+
    ~j                         ^ RI HtHtHtHtHt ^ RIHtHtH	t	H
t
 ^ RIHt ^ RIHt ^ RIHt ^ RIHt ^RIHt  ! R R	]4      tR
# )    )DictIteratorListOptionalUnion)
AddedToken	Tokenizerdecoderstrainers)	WordPiece)BertNormalizer)BertPreTokenizer)BertProcessing)BaseTokenizerc                      a a ] tR t^t oRtRV3R lV 3R lllt]V3R lR l4       tR	^R
. . RORR3V3R lR lltR	^R
. . RORRR3V3R lR llt	Rt
VtV ;t# )BertWordPieceTokenizerzBert WordPiece TokenizerNT##c                  < V ^8  d   QhRS[ S[S[S[S[S[3,          3,          ,          RS[S[S[3,          RS[S[S[3,          RS[S[S[3,          RS[S[S[3,          RS[S[S[3,          RS[RS[R	S[ S[,          R
S[RS[/# )   vocab	unk_token	sep_token	cls_token	pad_token
mask_token
clean_texthandle_chinese_charsstrip_accents	lowercasewordpieces_prefix)r   r   strr   intr   bool)format__classdict__s   "~/Users/mitch_tango/dev/rabbit-r1-livekit/agent/.venv/lib/python3.14/site-packages/tokenizers/implementations/bert_wordpiece.py__annotate__#BertWordPieceTokenizer.__annotate__   s     @0 @0c4S>123@0 j)@0 j)	@0
 j)@0 j)@0 #z/*@0 @0 #@0  ~@0 @0 @0    c                x  < Ve!   \        \        V\        V4      R7      4      pM\        \        \        V4      R7      4      pVP                  \        V4      4      e   VP	                  \        V4      .4       VP                  \        V4      4      e   VP	                  \        V4      .4       VP                  \        V4      4      e   VP	                  \        V4      .4       VP                  \        V4      4      e   VP	                  \        V4      .4       VP                  \        V4      4      e   VP	                  \        V4      .4       \        VVV	V
R7      Vn        \        4       Vn        Vez   VP                  \        V4      4      pVf   \        R4      hVP                  \        V4      4      pVf   \        R4      h\        \        V4      V3\        V4      V34      Vn        \        P                  ! VR7      Vn        RRRVR	VR
VRVRVRVRVRV	RV
RV/p\        SV `=  W4       R # )N)r   )r   r   r   r   z%sep_token not found in the vocabularyz%cls_token not found in the vocabulary)prefixmodelBertWordPiecer   r   r   r   r   r   r   r   r   r    )r	   r   r!   token_to_idadd_special_tokensr   
normalizerr   pre_tokenizer	TypeErrorr   post_processorr
   decodersuper__init__)selfr   r   r   r   r   r   r   r   r   r   r    	tokenizersep_token_idcls_token_id
parameters	__class__s   &&&&&&&&&&&&    r&   r6   BertWordPieceTokenizer.__init__   s	    !)ES^"LMI!)c)n"EFI   Y0<((#i.)9:  Y0<((#i.)9:  Y0<((#i.)9:  Y0<((#i.)9:  Z1=((#j/):;-!!5'	 
	 #3"4	$00Y@L# GHH$00Y@L# GHH'5s9~|6TWZ[dWegsVt'uI$$..6GH	 _**"$8]!2

 	/r)   c                    < V ^8  d   QhRS[ /# )r   r   )r!   )r$   r%   s   "r&   r'   r(   R   s     7 7 7r)   c                F    \         P                  ! V 4      p \        V 3/ VB # )N)r   	read_filer   )r   kwargss   &,r&   	from_file BertWordPieceTokenizer.from_fileQ   s"    ##E*%e6v66r)   i0u  i  c                   < V ^8  d   QhRS[ S[S[S[,          3,          RS[RS[RS[RS[S[,          RS[S[ S[S[3,          ,          RS[RS[/# )	r   files
vocab_sizemin_frequencylimit_alphabetinitial_alphabetspecial_tokensshow_progressr    )r   r!   r   r"   r   r#   )r$   r%   s   "r&   r'   r(   V   sz     6 6S$s)^$6 6 	6
 6 s)6 U3
?346 6 6r)   c	           
         \         P                  ! VVVVVVVR7      p	\        V\        4      '       d   V.pV P                  P                  WR7       R# )z%Train the model using the given filesrF   rG   rH   rI   rJ   rK   continuing_subword_prefix)trainerN)r   WordPieceTrainer
isinstancer!   
_tokenizertrain)
r7   rE   rF   rG   rH   rI   rJ   rK   r    rO   s
   &&&&&&&&& r&   rS   BertWordPieceTokenizer.trainV   sT    & ++!')-)'&7
 eS!!GEe5r)   c                   < V ^8  d   QhRS[ S[S[,          S[S[S[,          ,          3,          RS[RS[RS[RS[S[,          RS[S[ S[S[3,          ,          RS[RS[R	S[S[,          /	# )
r   iteratorrF   rG   rH   rI   rJ   rK   r    length)r   r   r!   r"   r   r   r#   r   )r$   r%   s   "r&   r'   r(   v   s     !
 !
x'>>?!
 !
 	!

 !
 s)!
 U3
?34!
 !
 !
  !!
r)   c
           
     |    \         P                  ! VVVVVVVR7      p
V P                  P                  VV
V	R7       R# )z(Train the model using the given iteratorrM   )rO   rW   N)r   rP   rR   train_from_iterator)r7   rV   rF   rG   rH   rI   rJ   rK   r    rW   rO   s   &&&&&&&&&& r&   rY   *BertWordPieceTokenizer.train_from_iteratorv   sK    ( ++!')-)'&7
 	++ 	, 	
r)    )N[UNK][SEP][CLS][PAD][MASK]TTNTr   )r_   r\   r^   r]   r`   )__name__
__module____qualname____firstlineno____doc__r6   staticmethodrB   rS   rY   __static_attributes____classdictcell____classcell__)r<   r%   s   @@r&   r   r      s|     "@0 @0D 7 7  "&(8
 #!%6 6F  "&(8
 #!% $!!
 !
 !
r)   r   N)typingr   r   r   r   r   
tokenizersr   r	   r
   r   tokenizers.modelsr   tokenizers.normalizersr   tokenizers.pre_tokenizersr   tokenizers.processorsr   base_tokenizerr   r   r[   r)   r&   <module>rq      s.    8 8 @ @ ' 1 6 0 )K
] K
r)   