+
    ~jI                         ^ RI HtHtHtHtHtHt ^RIHtH	t	H
t
HtHt ^RIHt ^RIHtHtHtHt ^RIHt  ! R R]4      tR# )	    )DictIteratorListOptionalTupleUnion)
AddedToken	Tokenizerdecoderspre_tokenizerstrainers)BPE)BertNormalizer	LowercaseSequenceunicode_normalizer_from_str)BaseTokenizerc                      a a ] tR t^	t oRtRV3R lV 3R lllt]V3R lR	 l4       tR
^R.R. RR3V3R lR lltR
^R.R. RRR3V3R lR llt	Rt
VtV ;t# )CharBPETokenizera  Original BPE Tokenizer

Represents the BPE algorithm, as introduced by Rico Sennrich
(https://arxiv.org/abs/1508.07909)

The defaults settings corresponds to OpenAI GPT BPE tokenizers and differs from the original
Sennrich subword-nmt implementation by the following options that you can deactivate:
    - adding a normalizer to clean up the text (deactivate with `bert_normalizer=False`) by:
        * removing any control characters and replacing all whitespaces by the classic one.
        * handle chinese chars by putting spaces around them.
        * strip all accents.
    - spitting on punctuation in addition to whitespaces (deactivate it with
      `split_on_whitespace_only=True`)
N<unk></w>Tc                  < V ^8  d   QhRS[ S[S[S[S[S[3,          3,          ,          RS[ S[S[S[S[S[S[3,          ,          3,          ,          RS[S[S[3,          RS[RS[ S[,          RS[	RS[ S[,          RS[	R	S[	/	# )
   vocabmerges	unk_tokensuffixdropout	lowercaseunicode_normalizerbert_normalizersplit_on_whitespace_only)
r   r   strr   intr   r   r	   floatbool)format__classdict__s   "~/Users/mitch_tango/dev/rabbit-r1-livekit/agent/.venv/lib/python3.14/site-packages/tokenizers/implementations/char_level_bpe.py__annotate__CharBPETokenizer.__annotate__   s     A0 A0c4S>123A0 sDsCx$99:;A0 j)	A0
 A0 %A0 A0 %SMA0 A0 #'A0    c
                "  < Ve(   Ve$   \        \        VVV\        V4      VR7      4      p
M\        \        \        V4      WTR7      4      p
V
P                  \        V4      4      e   V
P	                  \        V4      .4       . pV'       d   V\        V4      .,          pV'       d   V\        RR7      .,          pV'       d   V\        4       .,          p\        V4      ^ 8  d0   \        V4      ^8  d   \        V4      V
n
        MV^ ,          V
n
        V	'       d   \        P                  ! 4       V
n        M\        P                  ! 4       V
n        \        P                   ! VR7      V
n        RRRVR	VR
VRVRVRVRV	/p\$        SV `M  W4       R # )N)r   r   end_of_word_suffix)r   r   r.   F)r   )r   modelr   r   r   r   r   r    r!   r"   )r
   r   r#   token_to_idadd_special_tokensr   r   r   lenr   
normalizerr   WhitespaceSplitpre_tokenizerBertPreTokenizerr   
BPEDecoderdecodersuper__init__)selfr   r   r   r   r   r   r    r!   r"   	tokenizernormalizers
parameters	__class__s   &&&&&&&&&&   r)   r:   CharBPETokenizer.__init__   si    !3!#!)n'-I "#I"klI  Y0<((#i.)9: 78JKLLKNU;<<KIK=(K {a;!#'/'<	$'21~	$#&4&D&D&FI#&4&E&E&GI#$//v>	 Ufw "4&(@	

 	/r,   c                &   < V ^8  d   QhRS[ RS[ /# )r   vocab_filenamemerges_filename)r#   )r'   r(   s   "r)   r*   r+   ]   s     9 9# 9 9r,   c                J    \         P                  ! W4      w  r4\        W43/ VB # )N)r   	read_filer   )rB   rC   kwargsr   r   s   &&,  r)   	from_fileCharBPETokenizer.from_file\   s"    nF888r,   i0u  i  c                   < V ^8  d   QhRS[ S[S[S[,          3,          RS[RS[RS[S[ S[S[3,          ,          RS[RS[S[,          RS[S[,          RS[/# )	r   files
vocab_sizemin_frequencyspecial_tokenslimit_alphabetinitial_alphabetr   show_progress)r   r#   r   r$   r	   r   r&   )r'   r(   s   "r)   r*   r+   a   s~     6 6S$s)^$6 6 	6
 U3
?346 6 s)6 6 6r,   c	           
         \         P                  ! VVVVVVVR7      p	\        V\        4      '       d   V.pV P                  P                  WR7       R# )z%Train the model using the given filesrK   rL   rM   rN   rO   r.   rP   )trainerN)r   
BpeTrainer
isinstancer#   
_tokenizertrain)
r;   rJ   rK   rL   rM   rN   rO   r   rP   rS   s
   &&&&&&&&& r)   rW   CharBPETokenizer.traina   sT     %%!'))-%'
 eS!!GEe5r,   c                   < V ^8  d   QhRS[ S[S[,          S[S[S[,          ,          3,          RS[RS[RS[S[ S[S[3,          ,          RS[RS[S[,          RS[S[,          RS[R	S[S[,          /	# )
r   iteratorrK   rL   rM   rN   rO   r   rP   length)r   r   r#   r$   r   r	   r   r&   )r'   r(   s   "r)   r*   r+   {   s     
 
x'>>?
 
 	

 U3
?34
 
 s)
 
 
 
r,   c
           
     |    \         P                  ! VVVVVVVR7      p
V P                  P                  VV
V	R7       R# )z(Train the model using the given iteratorrR   )rS   r[   N)r   rT   rV   train_from_iterator)r;   rZ   rK   rL   rM   rN   rO   r   rP   r[   rS   s   &&&&&&&&&& r)   r]   $CharBPETokenizer.train_from_iterator{   sK     %%!'))-%'
 	++ 	, 	
r,    )	NNr   r   NFNTF)__name__
__module____qualname____firstlineno____doc__r:   staticmethodrG   rW   r]   __static_attributes____classdictcell____classcell__)r?   r(   s   @@r)   r   r   	   sy     A0 A0F 9 9  8?y"&( &"6 6:  8?y"&( &" $
 
 
r,   r   N)typingr   r   r   r   r   r    r	   r
   r   r   r   modelsr   r=   r   r   r   r   base_tokenizerr   r   r_   r,   r)   <module>rm      s+    ? ? H H  Z Z )M
} M
r,   