+
    ~j                         ^ RI HtHtHtHtHtHt ^ RIHtH	t	H
t
HtHtHt ^ RIHt ^ RIHtHtHt ^RIHt  ! R R]4      tR# )	    )DictIteratorListOptionalTupleUnion)
AddedToken	Tokenizerdecoderspre_tokenizers
processorstrainers)BPE)	LowercaseSequenceunicode_normalizer_from_str)BaseTokenizerc                      a a ] tR t^
t oRtRV3R lV 3R lllt]V3R lR l4       tR^R. 3V3R	 lR
 lltR^R. R3V3R lR llt	Rt
VtV ;t# )ByteLevelBPETokenizerzbByteLevelBPETokenizer

Represents a Byte-level BPE as introduced by OpenAI with their GPT-2 model
Nc                  < V ^8  d   QhRS[ S[S[S[S[S[3,          3,          ,          RS[ S[S[S[S[S[S[3,          ,          3,          ,          RS[RS[RS[ S[,          RS[ S[,          RS[ S[,          RS[ S[,          R	S[/	# )
   vocabmergesadd_prefix_space	lowercasedropoutunicode_normalizercontinuing_subword_prefixend_of_word_suffixtrim_offsets)	r   r   strr   intr   r   boolfloat)format__classdict__s   "~/Users/mitch_tango/dev/rabbit-r1-livekit/agent/.venv/lib/python3.14/site-packages/tokenizers/implementations/byte_level_bpe.py__annotate__"ByteLevelBPETokenizer.__annotate__   s     80 80c4S>12380 sDsCx$99:;80 	80
 80 %80 %SM80 $,C=80 %SM80 80    c
                l  < Ve3   Ve/   \        \        TTTT;'       g    RT;'       g    RR7      4      p
M\        \        4       4      p
. pV'       d   V\        V4      .,          pV'       d   V\        4       .,          p\	        V4      ^ 8  d0   \	        V4      ^8  d   \        V4      V
n        MV^ ,          V
n        \        P                  ! VR7      V
n	        \        P                  ! 4       V
n        \        P                  ! V	R7      V
n        RRRVRVR	VR
VRVRVRV	/p\        SV `=  W4       R # )N )r   r   r   )r   )r    modelByteLevelBPEr   r   r   r   r   r   r    )r
   r   r   r   lenr   
normalizerr   	ByteLevelpre_tokenizerr   decoderr   post_processorsuper__init__)selfr   r   r   r   r   r   r   r   r    	tokenizernormalizers
parameters	__class__s   &&&&&&&&&&   r'   r6   ByteLevelBPETokenizer.__init__   s/    !3!#.G.M.M2'9'?'?RI "#%(I 78JKLLKIK=(K {a;!#'/'<	$'21~	$"0":":L\"]	$..0	#-#7#7\#R	  ^ 0w "4')B "4L	

 	/r*   c                &   < V ^8  d   QhRS[ RS[ /# )r   vocab_filenamemerges_filename)r!   )r%   r&   s   "r'   r(   r)   K   s     > ># > >r*   c                J    \         P                  ! W4      w  r4\        W43/ VB # )N)r   	read_filer   )r>   r?   kwargsr   r   s   &&,  r'   	from_fileByteLevelBPETokenizer.from_fileJ   s"    nF$U=f==r*   i0u  Tc                   < V ^8  d   QhRS[ S[S[S[,          3,          RS[RS[RS[RS[S[ S[S[3,          ,          /# )r   files
vocab_sizemin_frequencyshow_progressspecial_tokens)r   r!   r   r"   r#   r	   )r%   r&   s   "r'   r(   r)   O   sX     6 6S$s)^$6 6 	6
 6 U3
?346r*   c                    \         P                  ! VVVV\        P                  P	                  4       R7      p\        V\        4      '       d   V.pV P                  P                  WR7       R# )z%Train the model using the given filesrG   rH   rI   rJ   initial_alphabet)trainerN)	r   
BpeTrainerr   r1   alphabet
isinstancer!   
_tokenizertrain)r7   rF   rG   rH   rI   rJ   rN   s   &&&&&& r'   rS   ByteLevelBPETokenizer.trainO   s]     %%!'')+55>>@
 eS!!GEe5r*   c                   < V ^8  d   QhRS[ S[S[,          S[S[S[,          ,          3,          RS[RS[RS[RS[S[ S[S[3,          ,          RS[S[,          /# )r   iteratorrG   rH   rI   rJ   length)r   r   r!   r"   r#   r   r	   r   )r%   r&   s   "r'   r(   r)   d   sp     
 
x'>>?
 
 	

 
 U3
?34
 
r*   c                    \         P                  ! VVVV\        P                  P	                  4       R7      pV P
                  P                  VVVR7       R# )z(Train the model using the given iteratorrL   )rN   rW   N)r   rO   r   r1   rP   rR   train_from_iterator)r7   rV   rG   rH   rI   rJ   rW   rN   s   &&&&&&& r'   rY   )ByteLevelBPETokenizer.train_from_iteratord   sT     %%!'')+55>>@
 	++ 	, 	
r*    )	NNFFNNNNF)__name__
__module____qualname____firstlineno____doc__r6   staticmethodrC   rS   rY   __static_attributes____classdictcell____classcell__)r;   r&   s   @@r'   r   r   
   sa     
80 80t > >  "796 60  "79 $
 
 
r*   r   N)typingr   r   r   r   r   r   
tokenizersr	   r
   r   r   r   r   tokenizers.modelsr   tokenizers.normalizersr   r   r   base_tokenizerr   r   r[   r*   r'   <module>rj      s+    ? ? \ \ ! S S )p
M p
r*   