@@ -2,7 +2,7 @@ use std::sync::LazyLock;
22
33use bpe:: byte_pair_encoding:: BytePairEncoding ;
44use either:: Either ;
5- use regex_automata:: { meta:: Regex , util:: captures:: Captures , Anchored , Input } ;
5+ use regex_automata:: { meta:: { BuildError , Regex } , util:: captures:: Captures , Anchored , Input } ;
66
77static BPE_R50K_BASE : LazyLock < Tokenizer > = LazyLock :: new ( || {
88 let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_r50k_base.dict" ) ) ;
@@ -65,15 +65,15 @@ pub struct Tokenizer {
6565
6666impl Tokenizer {
6767 #[ allow( clippy:: result_large_err) ]
68- pub fn new ( bpe : BytePairEncoding , pat : Option < & str > ) -> Result < Self , ( ) > {
69- let pat = pat. map ( Regex :: new) . transpose ( ) . map_err ( |_| ( ) ) ?;
68+ pub fn new ( bpe : BytePairEncoding , pat : Option < & str > ) -> Result < Self , BuildError > {
69+ let pat = pat. map ( Regex :: new) . transpose ( ) ?;
7070 Ok ( Self { bpe, pat } )
7171 }
7272
7373 /// When using multiple patterns, the second pattern is assumed to be a look-ahead pattern with
7474 /// exactly one look-ahead character!
75- pub fn with_many ( bpe : BytePairEncoding , patterns : & [ & str ] ) -> Result < Self , ( ) > {
76- let pat = Some ( Regex :: new_many ( patterns) . map_err ( |_| ( ) ) ?) ;
75+ pub fn with_many ( bpe : BytePairEncoding , patterns : & [ & str ] ) -> Result < Self , BuildError > {
76+ let pat = Some ( Regex :: new_many ( patterns) ?) ;
7777 Ok ( Self { bpe, pat } )
7878 }
7979
@@ -132,7 +132,7 @@ impl<'a> Iterator for SpecialRegexp<'a> {
132132 let start = self . last ;
133133 let mut end = self . last + m. range ( ) . end ;
134134 if m. pattern ( ) == 1 . into ( ) {
135- let last = self . input [ start..end] . chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
135+ let last = self . input [ start..end] . chars ( ) . next_back ( ) . expect ( "Expected at least a look-ahead character!" ) ;
136136 end -= last. len_utf8 ( ) ;
137137 assert_ne ! ( end, start, "a look-ahead pattern must ALWAYS consume at least one character excluding the look-ahead character!" ) ;
138138 }
0 commit comments