"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{9046:function(e,t,s){s.d(t,{t2:function(){return e4}});var n=s(761),i=s(911),r=s(8709),o=s(2414),a=s(1510),l=s(4087);async function c(e,t){let s=await Promise.all([(0,i.yM)(e,"tokenizer.json",!0,t),(0,i.yM)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(s[1].legacy=t.legacy),s}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(let[e,s]of g)t=t.replaceAll(e,s);return RegExp(t,"gu")}if(void 0===e.String)return console.warn("Unknown pattern type:",e),null;{let s=(0,n.hr)(e.String);return RegExp(t?s:`(${s})`,"gu")}}function u(e){return new Map(Object.entries(e))}function d(e){let t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function _(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function p(e){return e.replace(/[\u0300-\u036f]/g,"")}let f="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",g=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class m{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class k extends n.Ag{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new x(e);case"Unigram":return new w(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new z(e,...t);throw Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=function(e,t,s){let n=[],i=0;for(;ithis.tokens_to_ids.get(e)??this.unk_token_id)}convert_ids_to_tokens(e){return e.map(e=>this.vocab[e]??this.unk_token)}}class x extends k{constructor(e){for(let[t,s]of(super(e),this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t}encode(e){let t=[];for(let s of e){let e=[...s];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let n=!1,i=0,r=[];for(;i0&&(n=this.config.continuing_subword_prefix+n),this.tokens_to_ids.has(n)){s=n;break}--t}if(null===s){n=!0;break}r.push(s),i=t}n?t.push(this.unk_token):t.push(...r)}return t}}class w extends k{constructor(e,t){super(e);let s=e.vocab.length;this.vocab=Array(s),this.scores=Array(s);for(let t=0;t[e,t])),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,r.VV)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new a.GA,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){let t=e.sentence,s=t.length,n=0;for(;n{let e=[...Array.from({length:94},(e,t)=>t+33),...Array.from({length:12},(e,t)=>t+161),...Array.from({length:82},(e,t)=>t+174)],t=e.slice(),s=0;for(let n=0;n<256;++n)e.includes(n)||(e.push(n),t.push(256+s),s+=1);let n=t.map(e=>String.fromCharCode(e));return Object.fromEntries(e.map((e,t)=>[e,n[t]]))})(),b=(0,n.$2)(y);class v extends k{constructor(e){for(let[t,s]of(super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t;this.bpe_ranks=new Map(e.merges.map((e,t)=>[e,t])),this.merges=e.merges.map(e=>e.split(this.BPE_SPLIT_TOKEN)),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];let t=this.cache.get(e);if(void 0!==t)return t;let s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){let e=new a.Z3((e,t)=>e.score`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`)):t.push(this.unk_token)}return t}}class z extends k{constructor(e,t){for(let[s,n]of(super(e),this.tokens_to_ids=u(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[n]=s}encode(e){return e}}class A extends n.Ag{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new ea(e);case"Sequence":return new R(e);case"Replace":return new S(e);case"NFC":return new E(e);case"NFKC":return new T(e);case"NFKD":return new C(e);case"Strip":return new j(e);case"StripAccents":return new M(e);case"Lowercase":return new P(e);case"Prepend":return new $(e);default:throw Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class S extends A{normalize(e){let t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class E extends A{normalize(e){return e=e.normalize("NFC")}}class T extends A{normalize(e){return e=e.normalize("NFKC")}}class C extends A{normalize(e){return e=e.normalize("NFKD")}}class j extends A{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class M extends A{normalize(e){return e=p(e)}}class P extends A{normalize(e){return e=e.toLowerCase()}}class $ extends A{normalize(e){return e=this.config.prepend+e}}class R extends A{constructor(e){super(e),this.normalizers=e.normalizers.map(e=>A.fromConfig(e))}normalize(e){return this.normalizers.reduce((e,t)=>t.normalize(e),e)}}class N extends A{_tokenize_chinese_chars(e){let t=[];for(let s=0;s=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case" ":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){let t=[];for(let s of e){let e=s.charCodeAt(0);0===e||65533===e||this._is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class F extends n.Ag{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new L(e);case"Sequence":return new el(e);case"Whitespace":return new ec(e);case"WhitespaceSplit":return new eh(e);case"Metaspace":return new er(e);case"ByteLevel":return new O(e);case"Split":return new U(e);case"Punctuation":return new W(e);case"Digits":return new G(e);case"Replace":return new eu(e);default:throw Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map(e=>this.pre_tokenize_text(e,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class L extends F{constructor(e){super(),this.pattern=RegExp(`[^\\s${f}]+|[${f}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class O extends F{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=y,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(e=>Array.from(this.text_encoder.encode(e),e=>this.byte_encoder[e]).join(""))}}class U extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){let s=[],n=0;for(let i of e.matchAll(t)){let t=i[0];n0&&s.push(t),n=i.index+t.length}return nI.fromConfig(e))}post_process(e,t=null,s={}){let n;for(let i of this.processors)if(i instanceof Y)e=i.post_process(e).tokens,t&&(t=i.post_process(t).tokens);else{let r=i.post_process(e,t,s);e=r.tokens,n=r.token_type_ids}return{tokens:e,token_type_ids:n}}}class Z extends n.Ag{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new X(e);case"Metaspace":return new eo(e);case"ByteLevel":return new ee(e);case"Replace":return new V(e);case"ByteFallback":return new H(e);case"Fuse":return new J(e);case"Strip":return new Q(e);case"Sequence":return new es(e);case"CTC":return new et(e);case"BPEDecoder":return new en(e);default:throw Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class V extends Z{decode_chain(e){let t=h(this.config.pattern);return null===t?e:e.map(e=>e.replaceAll(t,this.config.content))}}class H extends Z{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){let t=[],s=[];for(let n of e){let e=null;if(6===n.length&&n.startsWith("<0x")&&n.endsWith(">")){let t=parseInt(n.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)s.push(e);else{if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}t.push(n)}}if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}return t}}class J extends Z{decode_chain(e){return[e.join("")]}}class Q extends Z{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map(e=>{let t=0;for(let s=0;s(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=_(e)),e))}}class ee extends Z{constructor(e){super(e),this.byte_decoder=b,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){let t=new Uint8Array([...e.join("")].map(e=>this.byte_decoder[e]));return this.text_decoder.decode(t)}decode_chain(e){let t=[],s=[];for(let n of e)void 0!==this.added_tokens.find(e=>e.content===n)?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}}class et extends Z{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";let t=[e[0]];for(let s=1;se!==this.pad_token).join("");return this.cleanup&&(s=_(s).replaceAll(this.word_delimiter_token," ").trim()),s}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class es extends Z{constructor(e){super(e),this.decoders=e.decoders.map(e=>Z.fromConfig(e))}decode_chain(e){return this.decoders.reduce((e,t)=>t.decode_chain(e),e)}}class en extends Z{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}}class ei extends Z{decode_chain(e){let t="";for(let s=1;se.normalize("NFKC")).join("~"):e.normalize("NFKC")}}class el extends F{constructor(e){super(),this.tokenizers=e.pretokenizers.map(e=>F.fromConfig(e))}pre_tokenize_text(e,t){return this.tokenizers.reduce((e,s)=>s.pre_tokenize(e,t),[e])}}class ec extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class eh extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\S+/g)||[]}}class eu extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}let ed=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];class e_ extends n.Ag{return_token_type_ids=!1;_default_chat_template=`{% for message in messages %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %}`;constructor(e,t){for(let s of(super(),this._tokenizer_config=t,this.normalizer=A.fromConfig(e.normalizer),this.pre_tokenizer=F.fromConfig(e.pre_tokenizer),this.model=k.fromConfig(e.model,t),this.post_processor=I.fromConfig(e.post_processor),this.decoder=Z.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[],e.added_tokens)){let e=new m(s);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.map(e=>`${e.lstrip?"\\s*":""}(${(0,n.hr)(e.content)})${e.rstrip?"\\s*":""}`).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,this.padding_side="right",this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){let e=Object.create(null);for(let{name:t,template:s}of this.chat_template){if("string"!=typeof t||"string"!=typeof s)throw Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=s}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(let t of e){let e=this._tokenizer_config[t];if(e){if("object"!=typeof e)return e;if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}}return null}static async from_pretrained(e,{progress_callback:t=null,config:s=null,cache_dir:n=null,local_files_only:i=!1,revision:r="main",legacy:o=null}={}){return new this(...await c(e,{progress_callback:t,config:s,cache_dir:n,local_files_only:i,revision:r,legacy:o}))}_call(e,{text_pair:t=null,add_special_tokens:s=!0,padding:i=!1,truncation:a=null,max_length:l=null,return_tensor:c=!0,return_token_type_ids:h=null}={}){let u;let d=Array.isArray(e);if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(Array.isArray(t)){if(e.length!==t.length)throw Error("text and text_pair must have the same length")}else throw Error("text_pair must also be an array");u=e.map((e,n)=>this._encode_plus(e,t[n],{add_special_tokens:s,return_token_type_ids:h}))}else u=e.map(e=>this._encode_plus(e,null,{add_special_tokens:s,return_token_type_ids:h}))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");u=[this._encode_plus(e,t,{add_special_tokens:s,return_token_type_ids:h})]}if(null===l?l="max_length"===i?this.model_max_length:(0,r.Fp)(u.map(e=>e.input_ids.length))[0]:a||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),l=Math.min(l,this.model_max_length),i||a)for(let e=0;el?a&&function(e,t){for(let s of Object.keys(e))e[s].length=t}(u[e],l):i&&function(e,t,s,i){for(let r of Object.keys(e)){let o=t-e[r].length,a=s(r),l=Array(o).fill(a);e[r]="right"===i?(0,n.eG)(e[r],l):(0,n.eG)(l,e[r])}}(u[e],l,e=>"input_ids"===e?this.pad_token_id:0,this.padding_side));let _={};if(c){if(!(i&&a)&&u.some(e=>{for(let t of Object.keys(e))if(e[t].length!==u[0][t]?.length)return!0;return!1}))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");let e=[u.length,u[0].input_ids.length];for(let t of Object.keys(u[0]))_[t]=new o.es("int64",BigInt64Array.from(u.flatMap(e=>e[t]).map(BigInt)),e)}else{for(let e of Object.keys(u[0]))_[e]=u.map(t=>t[e]);if(!d)for(let e of Object.keys(_))_[e]=_[e][0]}return _}_encode_text(e){return null===e?null:(this.added_tokens_regex?e.split(this.added_tokens_regex).filter(e=>e):[e]).map((e,t)=>{if(void 0!==this.added_tokens.find(t=>t.content===e))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=p(e.toLowerCase())),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];let s=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(s)}}).flat()}_encode_plus(e,t=null,{add_special_tokens:s=!0,return_token_type_ids:i=null}={}){let r=this._encode_text(e),o=this._encode_text(t),a=this.post_processor?this.post_processor(r,o,{add_special_tokens:s}):{tokens:(0,n.eG)(r??[],o??[])},l=this.model.convert_tokens_to_ids(a.tokens),c={input_ids:l,attention_mask:Array(l.length).fill(1)};return(i??this.return_token_type_ids)&&a.token_type_ids&&(c.token_type_ids=a.token_type_ids),c}encode(e,t=null,{add_special_tokens:s=!0,return_token_type_ids:n=null}={}){let{input_ids:i}=this._encode_plus(e,t,{add_special_tokens:s,return_token_type_ids:n});return i}batch_decode(e,t={}){return e instanceof o.es&&(e=e.tolist()),e.map(e=>this.decode(e,t))}decode(e,t={}){if(e instanceof o.es&&(e=d(e)),!Array.isArray(e)||0===e.length||!(0,n.Wy)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:s=null}){let n=this.model.convert_ids_to_tokens(e);t&&(n=n.filter(e=>!this.special_tokens.includes(e)));let i=this.decoder?this.decoder(n):n.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(s??this.clean_up_tokenization_spaces)&&(i=_(i)),i}get default_chat_template(){return this._warned_about_chat_template||(console.warn("No chat template is defined for this tokenizer - using a default chat template that implements the ChatML format. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information."),this._warned_about_chat_template=!0),this._default_chat_template}apply_chat_template(e,{chat_template:t=null,add_generation_prompt:s=!1,tokenize:n=!0,padding:i=!1,truncation:r=!1,max_length:o=null,return_tensor:a=!0,tokenizer_kwargs:c={},...h}={}){if(this.chat_template&&"object"==typeof this.chat_template||null===this.chat_template&&this.default_chat_template&&"object"==typeof this.default_chat_template){let e=this.chat_template??this.default_chat_template;if(null!==t&&Object.hasOwn(e,t))t=e[t];else if(null===t&&"default"in e)t=e.default;else if(null===t)throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(e).sort()}.`)}else t??=this.chat_template??this.default_chat_template;if("string"!=typeof t)throw Error(`chat_template must be a string, but got ${typeof t}`);let u=this._compiled_template_cache.get(t);void 0===u&&(u=new l.YS(t),this._compiled_template_cache.set(t,u));let d=Object.create(null);for(let e of ed){let t=this.getToken(e);t&&(d[e]=t)}let _=u.render({messages:e,add_generation_prompt:s,...d,...h});return n?this._call(_,{add_special_tokens:!1,padding:i,truncation:r,max_length:o,return_tensor:a,...c}).input_ids:_}}class ep extends e_{return_token_type_ids=!0}class ef extends e_{return_token_type_ids=!0}class eg extends e_{return_token_type_ids=!0}class em extends e_{return_token_type_ids=!0}class ek extends e_{return_token_type_ids=!0}class ex extends e_{return_token_type_ids=!0}class ew extends e_{return_token_type_ids=!0}class ey extends e_{return_token_type_ids=!0}class eb extends e_{return_token_type_ids=!0}class ev extends e_{}class ez extends e_{}class eA extends e_{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class eS extends e_{return_token_type_ids=!0}class eE extends e_{}class eT extends e_{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}'}class eC extends e_{}class ej extends e_{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eD(this,e,t,s)}}class eM extends ej{}class eP extends e_{}class e$ extends eT{constructor(e,t){let s=".,!?…。,、।۔،",n=e.pre_tokenizer?.pretokenizers[0]?.pattern;n&&n.Regex===` ?[^(\\s|[${s}])]+`&&(n.Regex=` ?[^\\s${s}]+`),super(e,t)}}class eR extends e_{_default_chat_template=`{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<> ' + system_message + ' <> ' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<> ' + content.strip() + ' <> ' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`;DEFAULT_SYSTEM_PROMPT="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.";constructor(e,t){super(e,t),this.use_default_system_prompt=t.use_default_system_prompt??!1,this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new er({replacement:"▁",add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text("▁"+e.replaceAll("▁"," "));return t.length>1&&"▁"===t[0]&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}get default_chat_template(){return super.default_chat_template.replaceAll("USE_DEFAULT_PROMPT",this.use_default_system_prompt?"true":"false").replaceAll("DEFAULT_SYSTEM_MESSAGE",this.DEFAULT_SYSTEM_PROMPT.replaceAll("\n","\\n").replaceAll("'","\\'"))}}class eN extends eR{}class eF extends e_{}class eL extends e_{}class eO extends e_{}class eU extends e_{}class eW extends e_{}class eG extends e_{}class eI extends e_{_default_chat_template="{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}"}class eB extends e_{}function eD(e,t,s,n){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e)||!(e.languageRegex instanceof RegExp))throw Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");let i=n.src_lang,r=n.tgt_lang;if(!e.language_codes.includes(r))throw Error(`Target language code "${r}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(let t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return n.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(r)])[0],e._call(t,s)}class eq extends e_{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eD(this,e,t,s)}}class eY extends e_{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)).map(e=>e.slice(2,-2)),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,s){return eD(this,e,t,s)}}let eK=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],eZ=new Map(eK),eV=new Map([...eK.map(([e,t])=>[t,e]),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);class eH extends e_{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}';_decode_asr(e,{return_timestamps:t=!1,return_language:s=!1,time_precision:n=null,force_full_sequences:i=!0}={}){if(null===n)throw Error("Must specify time_precision");let o=null,a="word"===t;function l(){return{language:o,timestamp:[null,null],text:""}}let c=[],h=l(),u=0,d=this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1,_=[],p=[],f=!1,g=null,m=new Set(this.all_special_ids);for(let s of e){let e=s.tokens,i=a?s.token_timestamps:null,k=null,x=d;if("stride"in s){let[t,i,r]=s.stride;if(u-=i,g=t-r,i&&(x=i/n+d),r)for(let t=e.length-1;t>=0;--t){let s=e[t];if(s>=d){if(null!==k&&(s-d)*n=d){let e=(g-d)*n+u,t=(0,r.NM)(e,2);if(null!==k&&g>=k)f=!0;else if(f||_.length>0&&g0?(_.push(w),a&&p.push(y)):_.every(e=>0===e.length)&&(h=l(),_=[],w=[],p=[],y=[])}if(_.length>0){if(i&&t)throw Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");let[e,s]=this.findLongestCommonSequence(_,p),n=this.decode(e);h.text=n,a&&(h.words=this.collateWordTimestamps(e,s,o)),c.push(h)}let k=Object.create(null),x=c.map(e=>e.text).join("");if(t||s){for(let e=0;e0,o=r?[]:null,a=r?t[0]:null;for(let l=1;le===_[t]).length,f=p/e+t;p>1&&f>h&&(h=f,u=[i,r,a,l])}let[_,p,f,g]=u,m=Math.floor((p+_)/2),k=Math.floor((g+f)/2);i.push(...s.slice(0,m)),n=(s=c.slice(k)).length,r&&(o.push(...a.slice(0,m)),a=t[l].slice(k))}return(i.push(...s),r)?(o.push(...a),[i,o]):[i,[]]}collateWordTimestamps(e,t,s){let[n,i,r]=this.combineTokensIntoWords(e,s),o=[];for(let e=0;e=n){let e=(0,r.NM)((t-n)*s,2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return(i=i.map(e=>"string"==typeof e?e:super.decode(e,t))).join("")}splitTokensOnUnicode(e){let t=this.decode(e,{decode_with_timestamps:!0}),s=[],n=[],i=[],r=[],o=[],a=0;for(let l=0;l=this.model.tokens_to_ids.get("<|endoftext|>"),d=l.startsWith(" "),_=l.trim(),p=a.test(_);if(u||d||p||0===i.length)i.push(l),r.push(c),o.push(h);else{let e=i.length-1;i[e]+=l,r[e].push(...c),o[e].push(...h)}}return[i,r,o]}mergePunctuations(e,t,s,i,r){let o=structuredClone(e),a=structuredClone(t),l=structuredClone(s),c=o.length-2,h=o.length-1;for(;c>=0;)o[c].startsWith(" ")&&i.includes(o[c].trim())?(o[h]=o[c]+o[h],a[h]=(0,n.eG)(a[c],a[h]),l[h]=(0,n.eG)(l[c],l[h]),o[c]="",a[c]=[],l[c]=[]):h=c,--c;for(c=0,h=1;he),a.filter(e=>e.length>0),l.filter(e=>e.length>0)]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:s=!0}={}){let n=[];if(e){e=e.toLowerCase();let t=eV.get(e);if(void 0===t){if(eZ.has(e))t=e;else{let t=2===e.length?eZ.keys():eZ.values();throw Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}}let s=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===s)throw Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(s)}else n.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);let e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(e)}else n.push(null);if(s){let e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw Error('Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.');n.push(e)}return n.map((e,t)=>[t+1,e]).filter(e=>null!==e[1])}}class eJ extends e_{}class eQ extends e_{}class eX extends e_{}class e0 extends e_{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter(e=>this.languageRegex.test(e)),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;let[t,...s]=e.trim().split(this.languageRegex);if(0===s.length)return super._encode_text(t);if(2===s.length){let[e,t]=s;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,n.eG)([e],super._encode_text(t))}}}class e1 extends e_{}class e2 extends e_{_default_chat_template="{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"}class e3 extends e2{}class e8 extends e_{}class e6 extends e_{}class e7 extends e_{constructor(e,t){super(e,t),this.decoder=new ei({})}}class e9 extends e_{}class e4{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:eE,DistilBertTokenizer:ev,CamembertTokenizer:ez,DebertaTokenizer:ek,DebertaV2Tokenizer:ex,BertTokenizer:ep,HerbertTokenizer:ew,ConvBertTokenizer:ey,RoFormerTokenizer:eb,XLMTokenizer:eA,ElectraTokenizer:eS,MobileBertTokenizer:eg,SqueezeBertTokenizer:em,AlbertTokenizer:ef,GPT2Tokenizer:eT,BartTokenizer:eC,MBartTokenizer:ej,MBart50Tokenizer:eM,RobertaTokenizer:eP,WhisperTokenizer:eH,CodeGenTokenizer:eJ,CLIPTokenizer:eQ,SiglipTokenizer:eX,MarianTokenizer:e0,BloomTokenizer:e$,NllbTokenizer:eq,M2M100Tokenizer:eY,LlamaTokenizer:eR,CodeLlamaTokenizer:eN,XLMRobertaTokenizer:eF,MPNetTokenizer:eL,FalconTokenizer:eO,GPTNeoXTokenizer:eU,EsmTokenizer:eW,Wav2Vec2CTCTokenizer:e1,BlenderbotTokenizer:e2,BlenderbotSmallTokenizer:e3,SpeechT5Tokenizer:e8,NougatTokenizer:e6,VitsTokenizer:e7,Qwen2Tokenizer:eG,GemmaTokenizer:eI,Grok1Tokenizer:eB,CohereTokenizer:e9,PreTrainedTokenizer:e_};static async from_pretrained(e,{quantized:t=!0,progress_callback:s=null,config:n=null,cache_dir:i=null,local_files_only:r=!1,revision:o="main",legacy:a=null}={}){let[l,h]=await c(e,{quantized:t,progress_callback:s,config:n,cache_dir:i,local_files_only:r,revision:o,legacy:a}),u=h.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer",d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=e_),new d(l,h)}}}}]);