@@ -6,7 +6,7 @@ _hypher_ separates words into syllables.
66 efficiently encoded finite automata at build time.
77- Zero load time: Hyphenation automata operate directly over the embedded
88 binary data with no up-front decoding.
9- - No allocations unless when hyphenating very long words (> 41 bytes). You can
9+ - No allocations unless when hyphenating very long words (> 45 bytes). You can
1010 disable the `alloc` feature, but then overly long words lead to a panic.
1111- Support for many languages.
1212- No unsafe code, no dependencies, no std.
@@ -60,6 +60,7 @@ extern crate alloc;
6060
6161use core:: fmt:: { self , Debug , Formatter } ;
6262use core:: iter:: FusedIterator ;
63+ use core:: num:: NonZeroU8 ;
6364
6465// Include language data.
6566include ! ( "lang.rs" ) ;
@@ -71,7 +72,7 @@ include!("lang.rs");
7172/// This uses the default [bounds](Lang::bounds) for the language.
7273///
7374/// # Panics
74- /// Panics if the word is more than 41 bytes long and the `alloc` feature is
75+ /// Panics if the word is more than [`MAX_WORD_LEN`](MAX_WORD_LEN) bytes long and the `alloc` feature is
7576/// disabled.
7677///
7778/// # Example
@@ -89,13 +90,13 @@ pub fn hyphenate(word: &str, lang: Lang) -> Syllables<'_> {
8990 hyphenate_bounded ( word, lang, left_min, right_min)
9091}
9192
92- /// Segment a word into syllables, but forbid breaking betwen the given number
93+ /// Segment a word into syllables, but forbid breaking between the given number
9394/// of chars to each side.
9495///
9596/// Returns an iterator over the syllables.
9697///
9798/// # Panics
98- /// Panics if the word is more than 41 bytes long and the `alloc` feature is
99+ /// Panics if the word is more than [`MAX_WORD_LEN`](MAX_WORD_LEN) bytes long and the `alloc` feature is
99100/// disabled.
100101///
101102/// # Example
@@ -262,22 +263,30 @@ impl ExactSizeIterator for Syllables<'_> {}
262263
263264impl FusedIterator for Syllables < ' _ > { }
264265
266+ /// The maximum size (in bytes) of words that may be hyphenated without allocating.
267+ pub const MAX_WORD_LEN : usize = 45 ;
268+ const INLINE_WORD_LEN : usize = MAX_WORD_LEN + 2 ; // +2 for dots
269+
265270/// Storage for and iterator over bytes.
266271#[ derive( Clone ) ]
267272enum Bytes {
268- Array ( core :: array :: IntoIter < u8 , 40 > , usize ) ,
273+ Array ( [ u8 ; INLINE_WORD_LEN ] , NonZeroU8 ) ,
269274 #[ cfg( feature = "alloc" ) ]
270275 Vec ( alloc:: vec:: IntoIter < u8 > ) ,
271276}
272277
273278impl Bytes {
274279 /// Create zero-initialized bytes.
275280 fn zeros ( len : usize ) -> Self {
276- if len <= 40 {
277- Self :: Array ( [ 0 ; 40 ] . into_iter ( ) , len)
281+ if len <= INLINE_WORD_LEN {
282+ // MAX+1-MAX is still nonzero, we can unwrap
283+ let len = NonZeroU8 :: new ( INLINE_WORD_LEN as u8 + 1 - len as u8 ) . unwrap ( ) ;
284+ Self :: Array ( [ 0 ; INLINE_WORD_LEN ] , len)
278285 } else {
279286 #[ cfg( not( feature = "alloc" ) ) ]
280- panic ! ( "hypher: maximum word length is 41 when `alloc` is disabled" ) ;
287+ panic ! (
288+ "hypher: maximum word length is {MAX_WORD_LEN} bytes when `alloc` is disabled"
289+ ) ;
281290
282291 #[ cfg( feature = "alloc" ) ]
283292 Self :: Vec ( alloc:: vec![ 0 ; len] . into_iter ( ) )
@@ -287,7 +296,7 @@ impl Bytes {
287296 /// Access the bytes as a slice.
288297 fn as_slice ( & self ) -> & [ u8 ] {
289298 match self {
290- Self :: Array ( iter , len ) => & iter . as_slice ( ) [ .. * len ] ,
299+ Self :: Array ( arr , start ) => & arr [ start . get ( ) as usize - 1 .. ] ,
291300 #[ cfg( feature = "alloc" ) ]
292301 Self :: Vec ( iter) => iter. as_slice ( ) ,
293302 }
@@ -296,7 +305,7 @@ impl Bytes {
296305 /// Access the bytes as a mutable slice.
297306 fn as_mut_slice ( & mut self ) -> & mut [ u8 ] {
298307 match self {
299- Self :: Array ( iter , len ) => & mut iter . as_mut_slice ( ) [ .. * len ] ,
308+ Self :: Array ( arr , start ) => & mut arr [ start . get ( ) as usize - 1 .. ] ,
300309 #[ cfg( feature = "alloc" ) ]
301310 Self :: Vec ( iter) => iter. as_mut_slice ( ) ,
302311 }
@@ -308,10 +317,11 @@ impl Iterator for Bytes {
308317
309318 fn next ( & mut self ) -> Option < Self :: Item > {
310319 match self {
311- Self :: Array ( iter, len) => {
312- if * len > 0 {
313- * len -= 1 ;
314- iter. next ( )
320+ Self :: Array ( arr, start) => {
321+ let index = start. get ( ) as usize - 1 ;
322+ if index < INLINE_WORD_LEN {
323+ * start = start. saturating_add ( 1 ) ; // Will never reach 255 anyways.
324+ Some ( arr[ index] )
315325 } else {
316326 None
317327 }
@@ -323,7 +333,7 @@ impl Iterator for Bytes {
323333
324334 fn size_hint ( & self ) -> ( usize , Option < usize > ) {
325335 match self {
326- Self :: Array ( _ , len ) => ( * len, Some ( * len) ) ,
336+ Self :: Array ( .. ) => ( self . as_slice ( ) . len ( ) , Some ( self . as_slice ( ) . len ( ) ) ) ,
327337 #[ cfg( feature = "alloc" ) ]
328338 Self :: Vec ( iter) => iter. size_hint ( ) ,
329339 }
@@ -442,7 +452,7 @@ fn is_char_boundary(b: u8) -> bool {
442452
443453#[ cfg( test) ]
444454mod tests {
445- use super :: { hyphenate, Lang } ;
455+ use super :: { hyphenate, Lang , MAX_WORD_LEN } ;
446456
447457 #[ allow( unused) ]
448458 use Lang :: * ;
@@ -457,16 +467,39 @@ mod tests {
457467 #[ test]
458468 #[ cfg( feature = "english" ) ]
459469 fn test_empty ( ) {
460- let mut syllables = hyphenate ( "" , Lang :: English ) ;
470+ let mut syllables = hyphenate ( "" , English ) ;
461471 assert_eq ! ( syllables. next( ) , None ) ;
462472 }
463473
464474 #[ test]
465475 #[ cfg( feature = "english" ) ]
466476 fn test_exact ( ) {
467- assert_eq ! ( hyphenate( "" , Lang :: English ) . len( ) , 0 ) ;
468- assert_eq ! ( hyphenate( "hello" , Lang :: English ) . len( ) , 1 ) ;
469- assert_eq ! ( hyphenate( "extensive" , Lang :: English ) . len( ) , 3 ) ;
477+ assert_eq ! ( hyphenate( "" , English ) . len( ) , 0 ) ;
478+ assert_eq ! ( hyphenate( "hello" , English ) . len( ) , 1 ) ;
479+ assert_eq ! ( hyphenate( "extensive" , English ) . len( ) , 3 ) ;
480+ }
481+
482+ const LONG_WORD : & str = "thisisaverylongstringwithanunrealisticwordlengthforenglishbutitmightbepossibleinanotherlanguage" ;
483+
484+ #[ test]
485+ #[ cfg( all( feature = "english" , feature = "alloc" ) ) ]
486+ fn test_alloc ( ) {
487+ assert_eq ! ( hyphenate( & LONG_WORD [ ..MAX_WORD_LEN - 1 ] , English ) . len( ) , 13 ) ;
488+ assert_eq ! ( hyphenate( & LONG_WORD [ ..MAX_WORD_LEN ] , English ) . len( ) , 13 ) ;
489+ assert_eq ! ( hyphenate( & LONG_WORD [ ..MAX_WORD_LEN + 1 ] , English ) . len( ) , 13 ) ;
490+ assert_eq ! ( hyphenate( LONG_WORD , English ) . len( ) , 26 ) ;
491+ }
492+
493+ #[ test]
494+ #[ cfg( all( feature = "english" , not( feature = "alloc" ) ) ) ]
495+ fn test_nonalloc ( ) {
496+ _ = hyphenate ( & LONG_WORD [ ..MAX_WORD_LEN ] , English ) . count ( ) ;
497+ }
498+ #[ test]
499+ #[ should_panic]
500+ #[ cfg( all( feature = "english" , not( feature = "alloc" ) ) ) ]
501+ fn test_nonalloc_fail ( ) {
502+ _ = hyphenate ( & LONG_WORD [ ..MAX_WORD_LEN + 1 ] , English ) . count ( ) ;
470503 }
471504
472505 #[ test]
0 commit comments