Skip to content

Commit e10e7fd

Browse files
committed
Reduce size of Bytes
1 parent 6b40344 commit e10e7fd

1 file changed

Lines changed: 53 additions & 20 deletions

File tree

src/lib.rs

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ _hypher_ separates words into syllables.
66
efficiently encoded finite automata at build time.
77
- Zero load time: Hyphenation automata operate directly over the embedded
88
binary data with no up-front decoding.
9-
- No allocations unless when hyphenating very long words (> 41 bytes). You can
9+
- No allocations unless when hyphenating very long words (> 45 bytes). You can
1010
disable the `alloc` feature, but then overly long words lead to a panic.
1111
- Support for many languages.
1212
- No unsafe code, no dependencies, no std.
@@ -60,6 +60,7 @@ extern crate alloc;
6060

6161
use core::fmt::{self, Debug, Formatter};
6262
use core::iter::FusedIterator;
63+
use core::num::NonZeroU8;
6364

6465
// Include language data.
6566
include!("lang.rs");
@@ -71,7 +72,7 @@ include!("lang.rs");
7172
/// This uses the default [bounds](Lang::bounds) for the language.
7273
///
7374
/// # Panics
74-
/// Panics if the word is more than 41 bytes long and the `alloc` feature is
75+
/// Panics if the word is more than [`MAX_WORD_LEN`](MAX_WORD_LEN) bytes long and the `alloc` feature is
7576
/// disabled.
7677
///
7778
/// # Example
@@ -89,13 +90,13 @@ pub fn hyphenate(word: &str, lang: Lang) -> Syllables<'_> {
8990
hyphenate_bounded(word, lang, left_min, right_min)
9091
}
9192

92-
/// Segment a word into syllables, but forbid breaking betwen the given number
93+
/// Segment a word into syllables, but forbid breaking between the given number
9394
/// of chars to each side.
9495
///
9596
/// Returns an iterator over the syllables.
9697
///
9798
/// # Panics
98-
/// Panics if the word is more than 41 bytes long and the `alloc` feature is
99+
/// Panics if the word is more than [`MAX_WORD_LEN`](MAX_WORD_LEN) bytes long and the `alloc` feature is
99100
/// disabled.
100101
///
101102
/// # Example
@@ -262,22 +263,30 @@ impl ExactSizeIterator for Syllables<'_> {}
262263

263264
impl FusedIterator for Syllables<'_> {}
264265

266+
/// The maximum size (in bytes) of words that may be hyphenated without allocating.
267+
pub const MAX_WORD_LEN: usize = 45;
268+
const INLINE_WORD_LEN: usize = MAX_WORD_LEN + 2; // +2 for dots
269+
265270
/// Storage for and iterator over bytes.
266271
#[derive(Clone)]
267272
enum Bytes {
268-
Array(core::array::IntoIter<u8, 40>, usize),
273+
Array([u8; INLINE_WORD_LEN], NonZeroU8),
269274
#[cfg(feature = "alloc")]
270275
Vec(alloc::vec::IntoIter<u8>),
271276
}
272277

273278
impl Bytes {
274279
/// Create zero-initialized bytes.
275280
fn zeros(len: usize) -> Self {
276-
if len <= 40 {
277-
Self::Array([0; 40].into_iter(), len)
281+
if len <= INLINE_WORD_LEN {
282+
// MAX+1-MAX is still nonzero, we can unwrap
283+
let len = NonZeroU8::new(INLINE_WORD_LEN as u8 + 1 - len as u8).unwrap();
284+
Self::Array([0; INLINE_WORD_LEN], len)
278285
} else {
279286
#[cfg(not(feature = "alloc"))]
280-
panic!("hypher: maximum word length is 41 when `alloc` is disabled");
287+
panic!(
288+
"hypher: maximum word length is {MAX_WORD_LEN} bytes when `alloc` is disabled"
289+
);
281290

282291
#[cfg(feature = "alloc")]
283292
Self::Vec(alloc::vec![0; len].into_iter())
@@ -287,7 +296,7 @@ impl Bytes {
287296
/// Access the bytes as a slice.
288297
fn as_slice(&self) -> &[u8] {
289298
match self {
290-
Self::Array(iter, len) => &iter.as_slice()[..*len],
299+
Self::Array(arr, start) => &arr[start.get() as usize - 1..],
291300
#[cfg(feature = "alloc")]
292301
Self::Vec(iter) => iter.as_slice(),
293302
}
@@ -296,7 +305,7 @@ impl Bytes {
296305
/// Access the bytes as a mutable slice.
297306
fn as_mut_slice(&mut self) -> &mut [u8] {
298307
match self {
299-
Self::Array(iter, len) => &mut iter.as_mut_slice()[..*len],
308+
Self::Array(arr, start) => &mut arr[start.get() as usize - 1..],
300309
#[cfg(feature = "alloc")]
301310
Self::Vec(iter) => iter.as_mut_slice(),
302311
}
@@ -308,10 +317,11 @@ impl Iterator for Bytes {
308317

309318
fn next(&mut self) -> Option<Self::Item> {
310319
match self {
311-
Self::Array(iter, len) => {
312-
if *len > 0 {
313-
*len -= 1;
314-
iter.next()
320+
Self::Array(arr, start) => {
321+
let index = start.get() as usize - 1;
322+
if index < INLINE_WORD_LEN {
323+
*start = start.saturating_add(1); // Will never reach 255 anyways.
324+
Some(arr[index])
315325
} else {
316326
None
317327
}
@@ -323,7 +333,7 @@ impl Iterator for Bytes {
323333

324334
fn size_hint(&self) -> (usize, Option<usize>) {
325335
match self {
326-
Self::Array(_, len) => (*len, Some(*len)),
336+
Self::Array(..) => (self.as_slice().len(), Some(self.as_slice().len())),
327337
#[cfg(feature = "alloc")]
328338
Self::Vec(iter) => iter.size_hint(),
329339
}
@@ -442,7 +452,7 @@ fn is_char_boundary(b: u8) -> bool {
442452

443453
#[cfg(test)]
444454
mod tests {
445-
use super::{hyphenate, Lang};
455+
use super::{hyphenate, Lang, MAX_WORD_LEN};
446456

447457
#[allow(unused)]
448458
use Lang::*;
@@ -457,16 +467,39 @@ mod tests {
457467
#[test]
458468
#[cfg(feature = "english")]
459469
fn test_empty() {
460-
let mut syllables = hyphenate("", Lang::English);
470+
let mut syllables = hyphenate("", English);
461471
assert_eq!(syllables.next(), None);
462472
}
463473

464474
#[test]
465475
#[cfg(feature = "english")]
466476
fn test_exact() {
467-
assert_eq!(hyphenate("", Lang::English).len(), 0);
468-
assert_eq!(hyphenate("hello", Lang::English).len(), 1);
469-
assert_eq!(hyphenate("extensive", Lang::English).len(), 3);
477+
assert_eq!(hyphenate("", English).len(), 0);
478+
assert_eq!(hyphenate("hello", English).len(), 1);
479+
assert_eq!(hyphenate("extensive", English).len(), 3);
480+
}
481+
482+
const LONG_WORD: &str = "thisisaverylongstringwithanunrealisticwordlengthforenglishbutitmightbepossibleinanotherlanguage";
483+
484+
#[test]
485+
#[cfg(all(feature = "english", feature = "alloc"))]
486+
fn test_alloc() {
487+
assert_eq!(hyphenate(&LONG_WORD[..MAX_WORD_LEN - 1], English).len(), 13);
488+
assert_eq!(hyphenate(&LONG_WORD[..MAX_WORD_LEN], English).len(), 13);
489+
assert_eq!(hyphenate(&LONG_WORD[..MAX_WORD_LEN + 1], English).len(), 13);
490+
assert_eq!(hyphenate(LONG_WORD, English).len(), 26);
491+
}
492+
493+
#[test]
494+
#[cfg(all(feature = "english", not(feature = "alloc")))]
495+
fn test_nonalloc() {
496+
_ = hyphenate(&LONG_WORD[..MAX_WORD_LEN], English).count();
497+
}
498+
#[test]
499+
#[should_panic]
500+
#[cfg(all(feature = "english", not(feature = "alloc")))]
501+
fn test_nonalloc_fail() {
502+
_ = hyphenate(&LONG_WORD[..MAX_WORD_LEN + 1], English).count();
470503
}
471504

472505
#[test]

0 commit comments

Comments
 (0)