+
    ~j+                     "  a ERB t=0 t R t^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RIH	t	 ^ RI
Ht ^RIHt ^RIHtHt ^RIHt ^RIHt ^R	IHtHtHtHtHt ^R
IHt ^RIHt ^RIH t  ^RI!H"t"H#t#H$t$H%t%H&t& ]! 4       '       d   ^RI'H(t( MRt(]! 4       '       d   ^RI)H*t* MRt*]PV                  ! ],4      t-/ t.] ^ k / t/] ^k ]]0]0R,          3,          ! . R]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NERCNR]! 4       '       d   RMR3NR ]! 4       '       d   R!MR3NERDNERENR"]! 4       '       d   R#MR3NR$]! 4       '       d   R%MR3NERFNR&]! 4       '       d   R'MR3NERGNR(]! 4       '       d   RMR3NR)]! 4       '       d   R*MR3NERHNR+]! 4       '       d   RMR3NERINR,]! 4       '       d   R-MR3NERJNR.]! 4       '       d   RMR3NERKNR/]! 4       '       d   RMR3NR0]! 4       '       d   RMR3NERLNR1]! 4       '       d   R2MR3NR3]! 4       '       d   R*MR3NR4]! 4       '       d   RMR3NR5]! 4       '       d   RMR3NR6]! 4       '       d   RMR3NR7]! 4       '       d   RMR3NR8]! 4       '       d   R9MR3NERMNERNNERONERPNR:]! 4       '       d   R*MR3NR;]! 4       '       d   R<MR3NR=]! 4       '       d   R>MR3NERQNR?]! 4       '       d   RMR3NR@]! 4       '       d   RAMR3NRB]! 4       '       d   RMR3NRC]! 4       '       d   R*MR3NRD]! 4       '       d   RMR3NERRNRE]! 4       '       d   RFMR3NRG]! 4       '       d   RHMR3NERSNRI]! 4       '       d   RMR3NRJ]! 4       '       d   R*MR3NRK]! 4       '       d   RLMR3NRM]! 4       '       d   RNMR3NERTNRO]! 4       '       d   RPMR3NRQ]! 4       '       d   RRMR3NRS]! 4       '       d   RRMR3NRT]! 4       '       d   RRMR3NRU]! 4       '       d   RRMR3NRV]! 4       '       d   RRMR3NRW]! 4       '       d   RRMR3NRX]! 4       '       d   RMR3NRY]! 4       '       d   RZMR3NR[]! 4       '       d   RZMR3NR\]! 4       '       d   RZMR3NR]]! 4       '       d   RZMR3NR^]! 4       '       d   RZMR3NR_]! 4       '       d   RZMR3NR`]! 4       '       d   RZMR3NRa]! 4       '       d   RZMR3NRb]! 4       '       d   RZMR3NRc]! 4       '       d   RdMR3NRe]! 4       '       d   R*MR3NRf]! 4       '       d   R*MR3NRg]! 4       '       d   R*MR3NRh]! 4       '       d   RFMR3NERUNRi]! 4       '       d   R*MR3NRj]! 4       '       d   RZMR3NRk]! 4       '       d   RZMR3NRl]! 4       '       d   RZMR3NRm]! 4       '       d   RZMR3NRn]! 4       '       d   RMR3NRo]! 4       '       d   RMR3NRp]! 4       '       d   RqMR3NERVNERWNRr]! 4       '       d   RsMR3NRt]! 4       '       d   RsMR3NRu]! 4       '       d   R*MR3NRv]! 4       '       d   R*MR3NRw]! 4       '       d   RMR3NRx]! 4       '       d   R*MR3NRy]! 4       '       d   RzMR3NR{]! 4       '       d   RzMR3NR|]! 4       '       d   R}MR3NR~]! 4       '       d   R}MR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NERXNR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NERYNR]! 4       '       d   RMR3NR]! 4       '       d   RzMR3NERZNR]! 4       '       d   RZMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RFMR3NER[NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NER\NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RZMR3NR]! 4       '       d   RZMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   R*MR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   R%MR3NR]! 4       '       d   R%MR3NER]NR]! 4       '       d   R*MR3NER^NR]! 4       '       d   RMR3NR]! 4       '       d   RM]! 4       '       d   RZMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NER_NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NER`NR]! 4       '       d   RMR3NR]! 4       '       d   RRMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NERaNERbNERcNR]! 4       '       d   RMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RRMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NERdNR]! 4       '       d   RMR3NR]! 4       '       d   RFMR3NR]! 4       '       d   R*MR3NR]! 4       '       d   RMR3NR]! 4       '       d   RMR3NR]! 4       '       d   RRMR3NEReNER ]! 4       '       d   RzMR3NER]! 4       '       d   RMR3NER]! 4       '       d   ERMR3NER]! 4       '       d   RMR3NERfNERgNER]! 4       '       d   RMR3NER]! 4       '       d   RMR3NERhNER]! 4       '       d   RM]! 4       '       d   RZMR3NER]! 4       '       d   RM]! 4       '       d   RZMR3NERiNERjNERkNERlNER	]! 4       '       d   ER
MR3NER]! 4       '       d   RMR3NER]! 4       '       d   ERMR3NERmNER]! 4       '       d   RzMR3NER]! 4       '       d   RzMR3NER]! 4       '       d   ERMR3NER]! 4       '       d   RFMR3NER]! 4       '       d   RzMR3NER]! 4       '       d   RMR3N4      t10 ERkERkERkERkERkERkERkERkERkERkERkER kER!kER"kER#kER$kER%kER&kER'kER(kER)kER*kER+kER,kER-kER.kER/kER0kER1kER2kER3kER4kRkER5kER6kER7kER8kt2] ^k ]2 F   t3]3]19  g   K  ]! 4       '       d   RZMR]1]3&   K"  	  ] ! ]"]14      t4]"Pj                  ! 4        U Uu/ uF  w  rWbK	  	  upp t6ER9 t7ER: t8ER; ER< lt9ERnER= ER> llt: ! ER? ER@4      t;ERAER@.t<R# u upp i (o  zAuto Tokenizer class.N)OrderedDict)Any)is_mistral_common_available)PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file)EncoderDecoderConfig)_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizerbertzbert-generationBertGenerationTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer
blenderbotBlenderbotTokenizerblipzblip-2GPT2Tokenizerbros	camembertCamembertTokenizerchinese_clipclipclipseg
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizerdbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3erniefalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neoxgptjgranite
granitemoegranitemoehybridgranitemoesharedzgrounding-dinogroupvitherbertHerbertTokenizerideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2jina_embeddings_v3XLMRobertaTokenizerzkosmos-2lasr_ctcLasrTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizerzmegatron-bert
metaclip_2minicpmv4_6	ministralMistralCommonBackend
ministral3mistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermptmt5T5Tokenizermusicgenmusicgen_melodymvpMvpTokenizernezhanllbNllbTokenizerznllb-moe
nomic_bertnougatNougatTokenizernystromformerolmoolmo2olmo3olmo_hybridolmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitparakeet_ctcParakeetTokenizerparakeet_tdtpegasus	pegasus_xphi
pix2structpixtralplbartPLBartTokenizerpp_formulanetqdqbertqianfan_ocrqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3qwen3_5Qwen3_5Tokenizerqwen3_5_moe	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moerealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribertroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemmatrocrtvpudopUdopTokenizerumt5viltvisual_bertvoxtralvoxtral_realtimewhisperWhisperTokenizerxclipxglmXGLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyosoarctic	chameleonchatlmdeepseek_v2deepseek_v3deepseek_v4deepseek_vldeepseek_vl_hybriddeepseek_vl_v2deepseek_ocrdeepseek_ocr2fuyu
h2ovl_chathyperclovax_vlm	internlm2internvl_chatjambajanusllava
llava_nextminicpmv
minimax_m2
modernbertmolmomolmo2nemotronnvfp4opencuaopenvlaphi3phi3_vphimoestep3p5step3_vlvipllava
cohere_asrc                    \        V RRR7      ;_uu_ 4       p\        P                  ! V4      uuRRR4       #   + '       g   i     R# ; i)z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders   & /Users/mitch_tango/dev/rabbit-r1-livekit/agent/.venv/lib/python3.14/site-packages/transformers/models/auto/tokenization_auto.py
load_vocabr?    s.    	j#	0	0Fyy  
1	0	0	0s	   8A		c           	     8   . p\        V RRR7      ;_uu_ 4       pV F^  pVP                  4       pV'       g   K  VP                  R4      '       d   K6  VP                  \	        VP                  4       4      4       K`  	  RRR4       V#   + '       g   i     T# ; i)z Loads a merges file into a list.r5  r6  r7  #N)r9  strip
startswithappendtuplesplit)merges_filemergesr=  lines   &   r>  load_mergesrJ    ss    F	k3	1	1VD::<DtDOOC00eDJJL12  
2
 M 
2	1
 Ms   BB,BB	c                T    V ^8  d   QhR\         R\        \        ,          R,          /# )   
class_namereturnN)strtyper   )formats   "r>  __annotate__rR    s#     3 3# 3$s)d2B 3    c                    V R9   d   \         # V \        9   d   \        V ,          # V \        9   d   \        V ,          # V R8X  d   \         # \        P	                  4        F  w  rW 8X  g   K  \        V4      pVR9   d    V R8X  d   \        P                  ! RR4      pM\        P                  ! RV 2R4      p \        W04      p\        VRR4      ;p'       dq   V\        P                  9   d\   \        P                  V,          p\        WdP                  R	,           V4       \        P                  P                  VR
,           V4       Vu # 	  \        P                   P#                  4        F  p\        VRR4      V 8X  g   K  Vu # 	  \        P                  ! R4      p\%        W4      '       d   \        W4      # V P'                  R	4      '       d   \)        V RR 4      # R#   \         d     EK  i ; i)BloomTokenizerr   r   z.tokenization_mistral_commontransformers.ztransformers.models
__module__NFast_fast__name__>   rU  BloomTokenizerFast)r   r   r   r   r   r   r  )r   REGISTERED_FAST_ALIASESREGISTERED_TOKENIZER_CLASSESTOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrsysmodulessetattrr[  
setdefaultAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattrendswithtokenizer_class_from_name)	rM  module_nametokenizer_classmoduleresultsubmodbase_mod	tokenizermain_modules	   &        r>  ro  ro    s   ==  ,,&z2211+J77((   )@(E(E(G$(3K@Krr"88"001OQ_`"001[M1BDYZ	 4%flDAAFAvQTQ\Q\G\"{{62HHoo&>GKK**6G+;XF# )H* '55<<>	9j$/:= ? )).9K{''{// 6""(CR99% " s   8BG""G21G2c                   V ^8  d   QhR\         \        P                  \         ,          ,          R\         \        P                  \         ,          ,          R,          R\        R\        \         \         3,          R,          R\        \         ,          R,          R\         R,          R\        R	\         R
\        \         \
        3,          /	# )rL  pretrained_model_name_or_path	cache_dirNforce_downloadproxiestokenrevisionlocal_files_only	subfolderrN  )rO  osPathLikebooldictr   )rQ  s   "r>  rR  rR    s     ] ]#&S)9#9]R[[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^]rS  c                D   VP                  R4      p	\        V \        VVVVVVVRRRV	R7      p
V
f   \        P	                  R4       / # \        W4      p	\        V
RR7      ;_uu_ 4       p\        P                  ! V4      pRRR4       V	XR&   V#   + '       g   i     L; i)a
  
Loads the tokenizer configuration from a pretrained model tokenizer configuration.

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a configuration file saved using the
          [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
        cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force to (re-)download the configuration files and override the cached versions if they
        exist.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
    token (`str` or *bool*, *optional*):
        The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
        when running `hf auth login` (stored in `~/.huggingface`).
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    local_files_only (`bool`, *optional*, defaults to `False`):
        If `True`, will only try to load the tokenizer configuration from local files.
    subfolder (`str`, *optional*, defaults to `""`):
        In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
        specify the folder name here.

<Tip>

Passing `token=True` is required when you want to use a private model.

</Tip>

Returns:
    `dict`: The configuration of the tokenizer.

Examples:

```python
# Download configuration from huggingface.co and cache.
tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
# This model does not have a tokenizer config so the result will be an empty dict.
tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

# Save a pretrained tokenizer locally and you can reload its config
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer.save_pretrained("tokenizer-test")
tokenizer_config = get_tokenizer_config("tokenizer-test")
```_commit_hashF)rz  r{  r|  r}  r~  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.r6  r7  )	getr   r	   loggerinfor
   r9  r:  r;  )ry  rz  r{  r|  r}  r~  r  r  kwargscommit_hashresolved_config_filer=  rs  s   &&&&&&&&,    r>  get_tokenizer_configr    s    J **^,K&%%))..305  #rs	%&:HK	"W	5	56" 
6(F>M 
6	5s   )BB	c                   p   a  ] tR tRt o RtR t]]! ]4      V 3R lR l4       4       t	]
R	R l4       tRtV tR# )
AutoTokenizeri6  a  
This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
created with the [`AutoTokenizer.from_pretrained`] class method.

This class cannot be instantiated directly using `__init__()` (throws an error).
c                    \        R 4      h)z}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs   &r>  __init__AutoTokenizer.__init__>  s    _
 	
rS  c                0   < V ^8  d   QhRS[ S[,          /# )rL  rN  )r   r   )rQ  __classdict__s   "r>  rR  AutoTokenizer.__annotate__F  s     T
 T
	1	1T
rS  c           	     *   VP                  RR4      pRVR&   VP                  RR4      pVP                  RR4      pVP                  RR4      pVP                  R4      pVe{   \        P                  VR4      p	V	f-   \        R	V R
RP	                  R \         4       4       R24      h\        V	4      p
V
f   \        RV	 R24      hV
P                  ! V.VO5/ VB # V'       d8   \        W3/ VB p\        VRR7      R,          p\        P                  ! R'/ VB pMVf    \        P                  ! V3RV/VB pVP                  p\        V3/ VB pVP                  RR4      pRpRV9   dG   \        VR,          \         \"        34      '       d   VR,          pMVR,          P                  RR4      pVEf    Ve   Ve   VR8w  d   \        P                  V4      e   \        P                  V4      P%                  R4      VP%                  R4      8w  d   \        P                  V4      P%                  R4      pVR(9  dF   V\&        9   d   TMTp\        V4      p
V
e(   V
P(                  R(9  d   V
P                  ! V.VO5/ VB # \*        e   \*        P                  ! V.VO5/ VB # \        RV R24      hRV9   d   VR,          VR&   V'       d   VP-                  R4      '       d   VRR) pVRJp\/        V4      \0        9   ;'       g6    VRJ;'       d*    \        V4      RJ;'       g    \        VR,           4      RJpT;'       dj    \/        V4      \0        9  ;'       dP    VRJ;'       dD    \        V4      ;'       g    \        VR,           4      P2                  P5                  R4      '       * pV'       d   V\&        9   d   RpRpV'       dO   V^,          e   V^,          pM	V^ ,          pRV9   d   VP7                  R4      ^ ,          pMRp\9        WqVVV4      pV'       dz   V'       dr   V'       gj   V'       d   \        VP%                  R4      4       \;        XV3/ VB p
VP                  RR4      pV
P=                  4        V
P                  ! V.VO5RV/VB # Vev   Tp\        V4      p
V
f*   VP-                  R4      '       g   \        VR,           4      p
V
e   V
P(                  R8X  d   \*        p
V
f   \*        p
V
P                  ! V.VO5/ VB # \?        VRR4      '       dQ   VP@                  pRV9  d   VP-                  R4      '       d   VRR) p\        V4      p
V
P                  ! V.VO5/ VB # \        V\B        4      '       d}   \/        VPD                  4      \/        VPF                  4      JdE   \H        PK                  R VPF                  PL                   R!VPD                  PL                   R"24       VPF                  p\O        \/        V4      P(                  4      ;'       g    \?        VR#R4      pVe>   \0        P                  \/        V4      \*        4      p
V
e   V
P                  ! V.VO5/ VB # VP                  RR4      pVe   VR8w  d   VP-                  R4      '       d   VRR) p\        V4      p
V
f*   VP-                  R4      '       g   \        VR,           4      p
V
e   V
P(                  R8X  d   \*        p
V
f   \*        p
V
P                  ! V.VO5/ VB # \        R$VPL                   R%RP	                  R& \0         4       4       R24      h  \        \        3 d    \        P                  ! T3/ TB p ELWi ; i)*a  
Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
falling back to using pattern matching on `pretrained_model_name_or_path`:

List options

Params:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        Can be either:

            - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
            - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
              using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
            - a path to a single saved vocabulary file if and only if the tokenizer only requires a
              single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
              applicable to all derived classes)
    inputs (additional positional arguments, *optional*):
        Will be passed along to the Tokenizer `__init__()` method.
    config ([`PreTrainedConfig`], *optional*)
        The configuration object used to determine the tokenizer class to instantiate.
    cache_dir (`str` or `os.PathLike`, *optional*):
        Path to a directory in which a downloaded pretrained model configuration should be cached if the
        standard cache should not be used.
    force_download (`bool`, *optional*, defaults to `False`):
        Whether or not to force the (re-)download the model weights and configuration files and override the
        cached versions if they exist.
    proxies (`dict[str, str]`, *optional*):
        A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
        'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
    revision (`str`, *optional*, defaults to `"main"`):
        The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
        git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
        identifier allowed by git.
    subfolder (`str`, *optional*):
        In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
        facebook/rag-token-base), specify it here.
    tokenizer_type (`str`, *optional*):
        Tokenizer type to be loaded.
    backend (`str`, *optional*, defaults to `"tokenizers"`):
        Backend to use for tokenization. Valid options are:
        - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
        - `"sentencepiece"`: Use the SentencePiece backend
    trust_remote_code (`bool`, *optional*, defaults to `False`):
        Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
        should only be set to `True` for repositories you trust and in which you have read the code, as it will
        execute code present on the Hub on your local machine.
    kwargs (additional keyword arguments, *optional*):
        Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
        `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
        `additional_special_tokens`. See parameters in the `__init__()` for more details.

Examples:

```python
>>> from transformers import AutoTokenizer

>>> # Download vocabulary from huggingface.co and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

>>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
>>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

>>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
>>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

>>> # Download vocabulary from huggingface.co and define model-specific arguments
>>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

>>> # Explicitly use the tokenizers backend
>>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

>>> # Explicitly use the sentencepiece backend
>>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3   $   "   T F  qx  K  	  R # 5iN .0cs   & r>  	<genexpr>0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s      D,Cq,Cs   rW  zTokenizer class z is not currently imported.F)return_tensorsrq  auto_mapr   rY  r   PythonBackendPreTrainedTokenizerFastzTokenizer class 'zf' specified in the tokenizer config was not found. The tokenizer may need to be converted or re-saved.r  ztransformers.z--code_revisionz The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typez!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   8   "   T F  qP                   x  K  	  R # 5ir  )r[  r  s   & r>  r  r  Y  s     4[IZAZZIZs   r  )r   r  r  r]  )(popr  r`  
ValueErrorjoinro  from_pretrainedr   r   r   	for_modelr  r   r  r  
isinstancerE  listremovesuffix)MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASSr[  r   rn  rP  rj  rX  rC  rF  r   r   register_for_auto_classrd  rq  r   decoderencoderr  warning	__class__r   )clsry  inputsr  r  _r  r  r  tokenizer_class_namerq  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_mapregistered_class_namerM  has_remote_codehas_local_codeexplicit_local_code	class_refupstream_repotokenizer_class_candidate_classr  s   &&*,                       r>  r  AutoTokenizer.from_pretrainedD  s   d Hd+#| JJz4($4d;"JJ':DAJJ{+	 %#:#>#>~t#T #+ .~.>>qyy D,C DDEQH 
 88LMO& #34H3IId!eff"223PdSYd]cdd#$AWPVWI.yOPXYK))8K8F^c#331EVZ` #-- 00MXQWX!1!5!56G!N "))*:6FF%5j%A"%5j%A%E%EoW[%\"
 &&2!-!R''++,=>J(,,->?LLVT&33F;= %<$?$?@Q$R$_$_`f$g!$,mm ),UU */ 
 #<J"G".?3K3K T 4
 +::;Xl[alekll ,(889VjY_jcijj#$:#; <F G 
 --%5n%EF>"!&<&E&Ef&M&M%;CR%@",D8f):: 
 
"$.  )*@AM Z Z,-Cf-LMUYY	 	   V$55  'd2 9 9-.DE R R01G&1PQ*ZZ89 	 04]]#O!%!!$0.q1	.q1	y  ) 5a 8 $ 9!.Racp! 09L%)*@*M*Mf*UV;IGdohnoO

?D1A335"22-06J[_e  $/(>%78QRO&/H/Q/QRX/Y/Y";<UX^<^"_*/G/G?/Z"3&"3"223PdSYd]cddV.55++F(66??6;R;R7?O"223PdSYd]cdd f233FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EFmm'RXZfhlJm
!/33DLBSTO*&667ThW]haghh "2!5!56G!N!-%)<<AWA`A`agAhAh)?)D&78NOO&/E/N/Nv/V/V";<RU[<["\*/G/G?/Z"3&"3"223PdSYd]cdd/0@0@/A B++/994[IZ4[+[*\\]_
 	
} ( c)99:Wb[abcs   !]& &(^^Nc                    Vf   Ve   TpMVe   TpM\        R4      hW#V3 F  pVf   K	  V\        VP                  &   K  	  Ve   Ve   V\        VP                  &   \        P                  WVR7       R# )a|  
Register a new tokenizer in this mapping.

Args:
    config_class ([`PreTrainedConfig`]):
        The configuration corresponding to the model to register.
    tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
    slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
    fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
Nz$You need to pass a `tokenizer_class`)exist_ok)r  r_  r[  r^  rj  register)config_classrq  slow_tokenizer_classfast_tokenizer_classr  	candidates   &&&&& r>  r  AutoTokenizer.register\  s     "#/"6%1"6 !GHH.oVI$CL,Y-?-?@ W  +0D0PEY#$8$A$AB""<8"TrS  r  )NNNF)r[  rX  __qualname____firstlineno____doc__r  classmethodr   r`  r  staticmethodr  __static_attributes____classdictcell__)r  s   @r>  r  r  6  sK     
 &'>?T
 @ T
l U UrS  r  rj  c                   V ^8  d   Qh/ ^ \         9   d&   \        \        \        \        ,          3,          ;R&   ^\         9   d&   \        \        \        \        ,          3,          ;R&   ^\         9   d   \
        \        ,          ;R&   # )rL  r_  r^  r  )__conditional_annotations__r  rO  rP  r   set)rQ  s   "r>  rR  rR     sf      ^ 8 7d3S	>2 7_ ` 3 2c49n- 2a `
& &3s8 &a
 rS  )bartphoBartphoTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizer)biogptBioGptTokenizer)zblenderbot-smallBlenderbotSmallTokenizer)bridgetowerr%   )byt5ByT5Tokenizer)canineCanineTokenizer)clapr%   )clvpClvpTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr%   )diaDiaTokenizer)esmEsmTokenizer)flaubertFlaubertTokenizer)fsmtFSMTTokenizer)gpt_neox_japaneseGPTNeoXJapaneseTokenizer)hubertr  )ibertr%   )lukeLukeTokenizer)megar%   )zmgp-strMgpstrTokenizer)mrar%   )myt5MyT5Tokenizer)	perceiverPerceiverTokenizer)phobertPhobertTokenizer)
prophetnetProphetNetTokenizer)ragRagTokenizer)robertar%   )zroberta-prelayernormr%   )roc_bertRoCBertTokenizer)splinterSplinterTokenizer)tapasTapasTokenizer)	unispeechr  )zunispeech-satr  )vitsVitsTokenizer)wav2vec2r  )zwav2vec2-bertr  )zwav2vec2-conformerr  )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizer)xlmXLMTokenizer)NFNNNFr  )>r  r  rb  r:  r  re  collectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r   modeling_gguf_pytorch_utilsr   tokenization_utils_baser	   utilsr
   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerr[  r  r_  r^  rO  r`  r  r  rj  ra  CONFIG_TO_TYPEr?  rJ  ro  r  r  __all__rR  )kvr  s   00@r>  <module>r7     s      	 
 #  G 3 \ ? <  % 2 *  BH			H	% 68  702  2%c3:o6X	%<%>%>/DIX	(?(A(A$tLX 
%<%>%>/DIX 
/F/H/H+dS	X
 
,C,E,E(4PX 
$;$=$=4HX 
'>'@'@#dKX 
*A*C*C&NX 	(X 
$;$=$=4HX 
9S9U9U5[_`X 	3X 	*X 
+B+D+D'$OX 
2I2K2K.QUVX  	&!X" 
0G0I0I,tT#X$ 	9%X& 
$;$=$=4H'X( 
&=&?&??TJ)X* 	,+X, 
$;$=$=4H-X. 	"/X0 
.E.G.G*TR1X2 	&3X4 
,C,E,E4P5X6 	%7X8 
$;$=$=4H9X: 
'>'@'@OdK;X< 	"=X> 
/F/H/H+dS?X@ 
'>'@'@OdKAXB 
(?(A(A$tLCXD 
)@)B)B%MEXF 
)@)B)B%MGXH 
(?(A(A_tLIXJ 
"9";";FKXL 	&MXN 	"OXP 	3QXR 	.SXT 
$;$=$=4HUXV 
*A*C*C&NWXX 
/F/H/H+dSYXZ 	 [X\ 
*A*C*CN]X^ 
1H1J1J-PTU_X` 
'>'@'@OdKaXb 
$;$=$=4HcXd 
%<%>%>/DIeXf 	 gXh 
/F/H/H+dSiXj 
!EXEZEZ"A`dekXl 	*mXn 
%<%>%>/DIoXp 
)@)B)BoMqXr 
)@)B)BoMsXt 
$;$=$=4HuXv 	"wXx 
(?(A(A$tLyXz 
&=&?&?"TJ{X| 
'>'@'@#dK}X~ 
'>'@'@#dKX@ 
,C,E,E(4PAXB 
(?(A(A$tLCXD 
-D-F-F)DQEXF 
#:#<#<$GGXH 
'>'@'@#dKIXJ 
(?(A(A$tLKXL 
,C,E,E(4PMXN 
1H1J1J-PTUOXP 
)@)B)B%MQXR 
-D-F-F)DQSXT 
-D-F-F)DQUXV 
*A*C*C&NWXX 
,C,E,E(4PYXZ 
)C)E)E%4P[X\ 
$;$=$=4H]X^ 
+B+D+D$O_X` 
'>'@'@OdKaXb 
+B+D+D'$OcXd 	:eXf 
$;$=$=4HgXh 
+B+D+D'$OiXj 
.E.G.G*TRkXl 
4K4M4M0SWXmXn 
4K4M4M0SWXoXp 
.E.G.G?TRqXr 
(?(A(A_tLsXt 
*A*C*C&NuXv 	+wXx 	&yXz 
(?(A(A$tL{X| 
)@)B)B%M}X~ 
,C,E,E4PX@ 
1H1J1JoPTUAXB 
)@)B)B%MCXD 
%<%>%>/DIEXF 
8O8Q8Q4W[\GXH 
.E.G.G*TRIXJ 
(?(A(A_tLKXL 
,C,E,E4PMXN 
(?(A(A_tLOXP 
0G0I0I,tTQXR 
0G0I0I,tTSXT 
.E.G.G*TRUXV 
"9";";FWXX 
0G0I0I,tTYXZ 
'>'@'@#dK[X\ 
-D-F-F)DQ]X^ 	"_X` 
(?(A(A$tLaXb 
)C)E)E%4PcXd 
(?(A(A$tLeXf 
)@)B)B%MgXh 
(B(D(D$$OiXj 
,C,E,E(4PkXl 
&=&?&?"TJmXn 
*A*C*C&NoXp 	%qXr 
-D-F-F/DQsXt 
0G0I0I,tTuXv 	'wXx 
/F/H/H+dSyX| *,, #)@)B)B%		
{XH *,, #)@)B)B%		
GXT *,, #)@)B)B%		
SX` *,, #)@)B)B%		
_Xl *,, #)@)B)B%		
kXv 
&@&B&B"MwXx 
1H1J1JoPTUyXz 
0G0I0I,tT{X| 
&=&?&?"TJ}X~ 
&=&?&?"TJX@ 	$AXB 
!8!:!:ECXD 
&=&?&?]TJEXF 
-D-F-FMDQGXH 
"9";";FIXJ 	"KXL 
%<%>%>/DIMXN 
$;$=$=4HOXP 
(?(A(A_tLQXR 
*A*C*CNSXT 
(?(A(A$tLUXV 
/F/H/H+dSWXX 
'>'@'@#dKYXZ 
(?(A(A$tL[X\ 
)@)B)B%M]X^ 
/F/H/H+dS_X` 
(?(A(A$tLaXb 
+B+D+D$OcXd 
)@)B)BoMeXf 
/F/H/H+dSgXh 
#:#<#<$GiXj 
&=&?&?"TJkXl 
%<%>%>/DImXn 
&=&?&??TJoXp 
0G0I0I,tTqXr 
0G0I0I,tTsXt 
*A*C*C&NuXv 
,C,E,E(4PwXx 	,yXz 
#:#<#<$G{X| 	(}X~ 
(?(A(A}tLXB *,, #)@)B)B%		
AXL 
(?(A(A$tLMXN 
/F/H/H+dSOXP 	.QXR 
'>'@'@OdKSXT 
,C,E,E(4PUXV 
&=&?&?"TJWXX 
-D-F-F)DQYXZ 
+B+D+D'$O[X\ 
,C,E,E(4P]X^ 
*A*C*C&N_X` 
)@)B)B%MaXb 
&=&?&?"TJcXd 
*A*C*C&NeXf 
.E.G.G*TRgXh 
*A*C*C&NiXj 
+B+D+D'$OkXl 
/F/H/H+dSmXn 
)@)B)B%MoXp 
-D-F-F)DQqXr 	 sXt 
%<%>%>/DIuXv 
0G0I0I,tTwXx 
,C,E,E(4PyXz 
*A*C*C&N{X| 
)@)B)BoM}X~ 	(X@ 	5AXB 	)CXD 
,C,E,E(4PEXF 
'>'@'@#dKGXH 
$;$=$=4HIXJ 
*A*C*CNKXL 
3J3L3L/RVWMXN 
6M6O6O2UYZOXP 
-D-F-F)DQQXR 
(B(D(D$$OSXT 
*A*C*C&NUXV 
5O5Q5Q1W[\WXX 
,F,H,H(dSYXZ 	*[X\ 
+B+D+D$O]X^ 
+B+D+D'$O_X` 
*A*C*CNaXb 
1H1J1JPTUcXd 
 7 9 9}tDeXf 
(?(A(A$tLgXh 	$iXj 
+B+D+D'$OkXl 
#:#<#<$GmXn 
$;$=$=4HoXp 
"9";";FqXr 	.sXt 	2uXv 
$;$=$=4HwXx 
+B+D+D$OyXz 	"{X~ *,, #)@)B)B%		
}XJ *,, #)@)B)B%		
IXT 	-UXV 	2WXX 	7YXZ 	<[X\ 
*A*C*C&N]X^ 
%<%>%>/DI_X` 
$;$=$=4HaXb 	 cXd 
1H1J1J-PTUeXf 
4K4M4M0SWXgXh 
&=&?&?"TJiXj 
(?(A(A$tLkXl 
*A*C*C&NmXn 
&=&?&?"TJoXZ |&7&7&7 &7 	&7
 &7 &7 &7 &7 &7 &7 &7 &7 &7 &7 &7  !&7" #&7$ %&7& '&7( )&7* +&7, -&7. /&70 1&72 3&74 5&76 7&78 9&7: ;&7< =&7> ?&7@ A&7B C&7D E&7F G&7H I&7J K&7 ) &P <J00E\E^E^.Adh
+ < %%9;RS #7#=#=#?@#?41!$#?@!3l] ]@DU DUN
 
0c As   D'AE'