File size: 757 Bytes
7026e84
 
 
 
 
 
 
 
 
 
 
 
 
 
4a43f6f
 
 
 
 
 
 
 
7026e84
a2706d6
7026e84
 
 
 
 
 
ae43bf0
7026e84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
export const SPECIAL_TOKENS_ATTRIBUTES = [
	"bos_token",
	"eos_token",
	"unk_token",
	"sep_token",
	"pad_token",
	"cls_token",
	"mask_token",
	// additional_special_tokens (TODO)
] as const;

/**
 * Public interface for a tokenizer's special tokens mapping
 */
export interface AddedToken {
	__type: "AddedToken";
	content?: string;
	lstrip?: boolean;
	normalized?: boolean;
	rstrip?: boolean;
	single_word?: boolean;
}
export type SpecialTokensMap = {
	[key in (typeof SPECIAL_TOKENS_ATTRIBUTES)[number]]?: string | AddedToken | null;
};
/**
 * Public interface for tokenizer config
 */
export interface TokenizerConfig extends SpecialTokensMap {
	use_default_system_prompt?: boolean;
	chat_template?: string | Array<{ name: string; template: string }>;
}