RefalMachine commited on 4 days ago

Commit

0baa152

•

1 Parent(s): b241d44

Upload folder using huggingface_hub

Browse files

Files changed (47) hide show

.gitattributes +4 -0
added_tokens.json +24 -0
config.json +28 -0
generation_config.json +10 -0
llmtf_eval/daru_treewayabstractive.jsonl +0 -0
llmtf_eval/daru_treewayabstractive_params.jsonl +54 -0
llmtf_eval/daru_treewayabstractive_total.jsonl +8 -0
llmtf_eval/daru_treewayextractive.jsonl +3 -0
llmtf_eval/daru_treewayextractive_params.jsonl +54 -0
llmtf_eval/daru_treewayextractive_total.jsonl +7 -0
llmtf_eval/darumeru_MultiQ.jsonl +0 -0
llmtf_eval/darumeru_MultiQ_params.jsonl +54 -0
llmtf_eval/darumeru_MultiQ_total.jsonl +8 -0
llmtf_eval/darumeru_PARus.jsonl +0 -0
llmtf_eval/darumeru_PARus_params.jsonl +54 -0
llmtf_eval/darumeru_PARus_total.jsonl +7 -0
llmtf_eval/darumeru_RCB.jsonl +0 -0
llmtf_eval/darumeru_RCB_params.jsonl +54 -0
llmtf_eval/darumeru_RCB_total.jsonl +8 -0
llmtf_eval/darumeru_RWSD.jsonl +0 -0
llmtf_eval/darumeru_RWSD_params.jsonl +54 -0
llmtf_eval/darumeru_RWSD_total.jsonl +7 -0
llmtf_eval/darumeru_cp_para_ru.jsonl +0 -0
llmtf_eval/darumeru_cp_para_ru_params.jsonl +54 -0
llmtf_eval/darumeru_cp_para_ru_total.jsonl +9 -0
llmtf_eval/darumeru_ruOpenBookQA.jsonl +0 -0
llmtf_eval/darumeru_ruOpenBookQA_params.jsonl +54 -0
llmtf_eval/darumeru_ruOpenBookQA_total.jsonl +8 -0
llmtf_eval/darumeru_ruWorldTree.jsonl +0 -0
llmtf_eval/darumeru_ruWorldTree_params.jsonl +54 -0
llmtf_eval/darumeru_ruWorldTree_total.jsonl +8 -0
llmtf_eval/evaluation_log.txt +251 -0
llmtf_eval/evaluation_results.txt +2 -0
llmtf_eval/nlpcoreteam_enMMLU.jsonl +3 -0
llmtf_eval/nlpcoreteam_enMMLU_params.jsonl +54 -0
llmtf_eval/nlpcoreteam_enMMLU_total.jsonl +7 -0
llmtf_eval/nlpcoreteam_ruMMLU.jsonl +3 -0
llmtf_eval/nlpcoreteam_ruMMLU_params.jsonl +54 -0
llmtf_eval/nlpcoreteam_ruMMLU_total.jsonl +7 -0
merges.txt +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +441 -0
special_tokens_map.json +38 -0
tokenizer.json +3 -0
tokenizer_config.json +207 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+llmtf_eval/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 131521,
+  "<tool_call>": 131520,
+  "<|box_end|>": 131512,
+  "<|box_start|>": 131511,
+  "<|endoftext|>": 131506,
+  "<|file_sep|>": 131527,
+  "<|fim_middle|>": 131523,
+  "<|fim_pad|>": 131525,
+  "<|fim_prefix|>": 131522,
+  "<|fim_suffix|>": 131524,
+  "<|im_end|>": 131508,
+  "<|im_start|>": 131507,
+  "<|image_pad|>": 131518,
+  "<|object_ref_end|>": 131510,
+  "<|object_ref_start|>": 131509,
+  "<|quad_end|>": 131514,
+  "<|quad_start|>": 131513,
+  "<|repo_name|>": 131526,
+  "<|video_pad|>": 131519,
+  "<|vision_end|>": 131516,
+  "<|vision_pad|>": 131517,
+  "<|vision_start|>": 131515
+}

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/sft1",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "eos_token_id": 131508,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 70,
+  "model_type": "qwen2",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 131528
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_sample": true,
+  "eos_token_id": 131508,
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.45.2"
+}

llmtf_eval/daru_treewayabstractive.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/daru_treewayabstractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 512,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 200,
+        "method": "generate"
+    }
+}

llmtf_eval/daru_treewayabstractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "daru/treewayabstractive",
+    "results": {
+        "rouge1": 0.33097672264173833,
+        "rouge2": 0.12022011135293731
+    },
+    "leaderboard_result": 0.22559841699733782
+}

llmtf_eval/daru_treewayextractive.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c99ed436217de5a8eaf9183764f33451cd1fb26dc92816597d85650ee479fa1c
+size 218195331

llmtf_eval/daru_treewayextractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 1,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 1000,
+        "method": "calculate_logsoftmax"
+    }
+}

llmtf_eval/daru_treewayextractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "daru/treewayextractive",
+    "results": {
+        "r-prec": 0.38688455988455983
+    },
+    "leaderboard_result": 0.38688455988455983
+}

llmtf_eval/darumeru_MultiQ.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_MultiQ_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval/darumeru_MultiQ_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/MultiQ",
+    "results": {
+        "f1": 0.3543006348150236,
+        "em": 0.23996175908221798
+    },
+    "leaderboard_result": 0.2971311969486208
+}

llmtf_eval/darumeru_PARus.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_PARus_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/darumeru_PARus_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/PARus",
+    "results": {
+        "acc": 0.69
+    },
+    "leaderboard_result": 0.69
+}

llmtf_eval/darumeru_RCB.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_RCB_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/darumeru_RCB_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/RCB",
+    "results": {
+        "acc": 0.5409090909090909,
+        "f1_macro": 0.4899858481029719
+    },
+    "leaderboard_result": 0.5154474695060314
+}

llmtf_eval/darumeru_RWSD.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_RWSD_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/darumeru_RWSD_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/RWSD",
+    "results": {
+        "acc": 0.5392156862745098
+    },
+    "leaderboard_result": 0.5392156862745098
+}

llmtf_eval/darumeru_cp_para_ru.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_cp_para_ru_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 1024,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval/darumeru_cp_para_ru_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_para_ru",
+    "results": {
+        "symbol_per_token": 3.7695317959683377,
+        "len": 0.9951596967747576,
+        "lcs": 0.9
+    },
+    "leaderboard_result": 0.9
+}

llmtf_eval/darumeru_ruOpenBookQA.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_ruOpenBookQA_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/darumeru_ruOpenBookQA_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruOpenBookQA",
+    "results": {
+        "acc": 0.7152061855670103,
+        "f1_macro": 0.7151629824958838
+    },
+    "leaderboard_result": 0.715184584031447
+}

llmtf_eval/darumeru_ruWorldTree.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval/darumeru_ruWorldTree_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/darumeru_ruWorldTree_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruWorldTree",
+    "results": {
+        "acc": 0.8761904761904762,
+        "f1_macro": 0.8751507751507751
+    },
+    "leaderboard_result": 0.8756706256706257
+}

llmtf_eval/evaluation_log.txt ADDED Viewed

	@@ -0,0 +1,251 @@

+INFO: 2024-10-15 08:03:25,784: llmtf.base.evaluator: Starting eval on ['darumeru/multiq']
+INFO: 2024-10-15 08:03:25,784: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:03:25,784: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:03:29,508: llmtf.base.darumeru/MultiQ: Loading Dataset: 3.72s
+INFO: 2024-10-15 08:08:38,771: llmtf.base.darumeru/MultiQ: Processing Dataset: 309.26s
+INFO: 2024-10-15 08:08:38,771: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
+INFO: 2024-10-15 08:08:38,772: llmtf.base.darumeru/MultiQ: {'f1': 0.3543006348150236, 'em': 0.23996175908221798}
+INFO: 2024-10-15 08:08:38,777: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:08:38,778: llmtf.base.evaluator:
+mean	darumeru/MultiQ
+0.297	0.297
+INFO: 2024-10-15 08:08:47,368: llmtf.base.evaluator: Starting eval on ['darumeru/parus']
+INFO: 2024-10-15 08:08:47,368: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:08:47,368: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:08:49,664: llmtf.base.darumeru/PARus: Loading Dataset: 2.30s
+INFO: 2024-10-15 08:08:54,092: llmtf.base.darumeru/PARus: Processing Dataset: 4.43s
+INFO: 2024-10-15 08:08:54,093: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
+INFO: 2024-10-15 08:08:54,104: llmtf.base.darumeru/PARus: {'acc': 0.69}
+INFO: 2024-10-15 08:08:54,105: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:08:54,106: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus
+0.494	0.297	0.690
+INFO: 2024-10-15 08:09:02,805: llmtf.base.evaluator: Starting eval on ['darumeru/rcb']
+INFO: 2024-10-15 08:09:02,805: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:09:02,805: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:09:05,232: llmtf.base.darumeru/RCB: Loading Dataset: 2.43s
+INFO: 2024-10-15 08:09:10,833: llmtf.base.darumeru/RCB: Processing Dataset: 5.60s
+INFO: 2024-10-15 08:09:10,834: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
+INFO: 2024-10-15 08:09:10,837: llmtf.base.darumeru/RCB: {'acc': 0.5409090909090909, 'f1_macro': 0.4899858481029719}
+INFO: 2024-10-15 08:09:10,838: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:09:10,839: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus	darumeru/RCB
+0.501	0.297	0.690	0.515
+INFO: 2024-10-15 08:09:19,476: llmtf.base.evaluator: Starting eval on ['darumeru/ruopenbookqa']
+INFO: 2024-10-15 08:09:19,476: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:09:19,476: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:09:22,959: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 3.48s
+INFO: 2024-10-15 08:10:13,472: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 50.51s
+INFO: 2024-10-15 08:10:13,473: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
+INFO: 2024-10-15 08:10:13,483: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.7152061855670103, 'f1_macro': 0.7151629824958838}
+INFO: 2024-10-15 08:10:13,491: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:10:13,492: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/ruOpenBookQA
+0.554	0.297	0.690	0.515	0.715
+INFO: 2024-10-15 08:10:22,100: llmtf.base.evaluator: Starting eval on ['darumeru/ruworldtree']
+INFO: 2024-10-15 08:10:22,100: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:10:22,100: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:10:24,588: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 2.49s
+INFO: 2024-10-15 08:10:27,304: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 2.72s
+INFO: 2024-10-15 08:10:27,305: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
+INFO: 2024-10-15 08:10:27,309: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8761904761904762, 'f1_macro': 0.8751507751507751}
+INFO: 2024-10-15 08:10:27,310: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:10:27,310: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/ruOpenBookQA	darumeru/ruWorldTree
+0.619	0.297	0.690	0.515	0.715	0.876
+INFO: 2024-10-15 08:10:36,302: llmtf.base.evaluator: Starting eval on ['darumeru/rwsd']
+INFO: 2024-10-15 08:10:36,302: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:10:36,302: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:10:39,307: llmtf.base.darumeru/RWSD: Loading Dataset: 3.01s
+INFO: 2024-10-15 08:10:44,723: llmtf.base.darumeru/RWSD: Processing Dataset: 5.42s
+INFO: 2024-10-15 08:10:44,723: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
+INFO: 2024-10-15 08:10:44,725: llmtf.base.darumeru/RWSD: {'acc': 0.5392156862745098}
+INFO: 2024-10-15 08:10:44,726: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:10:44,727: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruOpenBookQA	darumeru/ruWorldTree
+0.605	0.297	0.690	0.515	0.539	0.715	0.876
+INFO: 2024-10-15 08:10:53,270: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
+INFO: 2024-10-15 08:10:53,270: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:10:53,270: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:11:06,662: llmtf.base.daru/treewayextractive: Loading Dataset: 13.39s
+INFO: 2024-10-15 08:13:53,187: llmtf.base.daru/treewayextractive: Processing Dataset: 166.53s
+INFO: 2024-10-15 08:13:53,188: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
+INFO: 2024-10-15 08:13:53,422: llmtf.base.daru/treewayextractive: {'r-prec': 0.38688455988455983}
+INFO: 2024-10-15 08:13:53,464: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:13:53,465: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruOpenBookQA	darumeru/ruWorldTree
+0.574	0.387	0.297	0.690	0.515	0.539	0.715	0.876
+INFO: 2024-10-15 08:14:02,066: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
+INFO: 2024-10-15 08:14:02,067: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:14:02,067: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:16:12,217: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 130.15s
+INFO: 2024-10-15 08:22:19,125: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 366.91s
+INFO: 2024-10-15 08:22:19,125: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
+INFO: 2024-10-15 08:22:19,191: llmtf.base.nlpcoreteam/ruMMLU:                                        metric
+subject
+abstract_algebra                     0.320000
+anatomy                              0.444444
+astronomy                            0.631579
+business_ethics                      0.570000
+clinical_knowledge                   0.584906
+college_biology                      0.500000
+college_chemistry                    0.340000
+college_computer_science             0.490000
+college_mathematics                  0.360000
+college_medicine                     0.537572
+college_physics                      0.421569
+computer_security                    0.580000
+conceptual_physics                   0.527660
+econometrics                         0.368421
+electrical_engineering               0.524138
+elementary_mathematics               0.507937
+formal_logic                         0.341270
+global_facts                         0.360000
+high_school_biology                  0.670968
+high_school_chemistry                0.477833
+high_school_computer_science         0.640000
+high_school_european_history         0.727273
+high_school_geography                0.707071
+high_school_government_and_politics  0.595855
+high_school_macroeconomics           0.525641
+high_school_mathematics              0.425926
+high_school_microeconomics           0.525210
+high_school_physics                  0.463576
+high_school_psychology               0.704587
+high_school_statistics               0.546296
+high_school_us_history               0.651961
+high_school_world_history            0.717300
+human_aging                          0.565022
+human_sexuality                      0.625954
+international_law                    0.719008
+jurisprudence                        0.638889
+logical_fallacies                    0.527607
+machine_learning                     0.392857
+management                           0.660194
+marketing                            0.722222
+medical_genetics                     0.560000
+miscellaneous                        0.625798
+moral_disputes                       0.575145
+moral_scenarios                      0.262570
+nutrition                            0.617647
+philosophy                           0.633441
+prehistory                           0.543210
+professional_accounting              0.372340
+professional_law                     0.370926
+professional_medicine                0.492647
+professional_psychology              0.506536
+public_relations                     0.509091
+security_studies                     0.653061
+sociology                            0.681592
+us_foreign_policy                    0.710000
+virology                             0.433735
+world_religions                      0.672515
+INFO: 2024-10-15 08:22:19,199: llmtf.base.nlpcoreteam/ruMMLU:                                    metric
+subject
+STEM                             0.490019
+humanities                       0.567778
+other (business, health, misc.)  0.539038
+social sciences                  0.592752
+INFO: 2024-10-15 08:22:19,204: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5473965020204639}
+INFO: 2024-10-15 08:22:19,243: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:22:19,245: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruOpenBookQA	darumeru/ruWorldTree	nlpcoreteam/ruMMLU
+0.571	0.387	0.297	0.690	0.515	0.539	0.715	0.876	0.547
+INFO: 2024-10-15 08:22:28,449: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
+INFO: 2024-10-15 08:22:28,449: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:22:28,449: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:24:37,142: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 128.69s
+INFO: 2024-10-15 08:30:16,279: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 339.14s
+INFO: 2024-10-15 08:30:16,280: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
+INFO: 2024-10-15 08:30:16,347: llmtf.base.nlpcoreteam/enMMLU:                                        metric
+subject
+abstract_algebra                     0.370000
+anatomy                              0.600000
+astronomy                            0.703947
+business_ethics                      0.700000
+clinical_knowledge                   0.724528
+college_biology                      0.708333
+college_chemistry                    0.430000
+college_computer_science             0.600000
+college_mathematics                  0.380000
+college_medicine                     0.676301
+college_physics                      0.480392
+computer_security                    0.710000
+conceptual_physics                   0.638298
+econometrics                         0.500000
+electrical_engineering               0.586207
+elementary_mathematics               0.544974
+formal_logic                         0.357143
+global_facts                         0.350000
+high_school_biology                  0.796774
+high_school_chemistry                0.576355
+high_school_computer_science         0.680000
+high_school_european_history         0.763636
+high_school_geography                0.772727
+high_school_government_and_politics  0.844560
+high_school_macroeconomics           0.684615
+high_school_mathematics              0.466667
+high_school_microeconomics           0.756303
+high_school_physics                  0.450331
+high_school_psychology               0.847706
+high_school_statistics               0.643519
+high_school_us_history               0.813725
+high_school_world_history            0.835443
+human_aging                          0.686099
+human_sexuality                      0.763359
+international_law                    0.768595
+jurisprudence                        0.777778
+logical_fallacies                    0.766871
+machine_learning                     0.464286
+management                           0.805825
+marketing                            0.893162
+medical_genetics                     0.740000
+miscellaneous                        0.777778
+moral_disputes                       0.656069
+moral_scenarios                      0.282682
+nutrition                            0.728758
+philosophy                           0.713826
+prehistory                           0.740741
+professional_accounting              0.510638
+professional_law                     0.462842
+professional_medicine                0.672794
+professional_psychology              0.673203
+public_relations                     0.700000
+security_studies                     0.714286
+sociology                            0.805970
+us_foreign_policy                    0.770000
+virology                             0.475904
+world_religions                      0.807018
+INFO: 2024-10-15 08:30:16,355: llmtf.base.nlpcoreteam/enMMLU:                                    metric
+subject
+STEM                             0.568338
+humanities                       0.672798
+other (business, health, misc.)  0.667271
+social sciences                  0.736061
+INFO: 2024-10-15 08:30:16,361: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6611166912740201}
+INFO: 2024-10-15 08:30:16,417: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:30:16,419: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruOpenBookQA	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.581	0.387	0.297	0.690	0.515	0.539	0.715	0.876	0.661	0.547
+INFO: 2024-10-15 08:30:25,792: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
+INFO: 2024-10-15 08:30:25,792: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:30:25,792: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:30:29,807: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.01s
+INFO: 2024-10-15 08:34:18,637: llmtf.base.daru/treewayabstractive: Processing Dataset: 228.83s
+INFO: 2024-10-15 08:34:18,637: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
+INFO: 2024-10-15 08:34:18,638: llmtf.base.daru/treewayabstractive: {'rouge1': 0.33097672264173833, 'rouge2': 0.12022011135293731}
+INFO: 2024-10-15 08:34:18,640: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:34:18,640: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/ruOpenBookQA	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.545	0.226	0.387	0.297	0.690	0.515	0.539	0.715	0.876	0.661	0.547
+INFO: 2024-10-15 08:34:27,535: llmtf.base.evaluator: Starting eval on ['darumeru/cp_para_ru']
+INFO: 2024-10-15 08:34:27,535: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [131508]
+INFO: 2024-10-15 08:34:27,535: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
+INFO: 2024-10-15 08:34:30,099: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 2.56s
+INFO: 2024-10-15 08:37:05,943: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 155.84s
+INFO: 2024-10-15 08:37:05,944: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
+INFO: 2024-10-15 08:37:05,944: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 3.7695317959683377, 'len': 0.9951596967747576, 'lcs': 0.9}
+INFO: 2024-10-15 08:37:05,945: llmtf.base.evaluator: Ended eval
+INFO: 2024-10-15 08:37:05,946: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/cp_para_ru	darumeru/ruOpenBookQA	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.578	0.226	0.387	0.297	0.690	0.515	0.539	0.900	0.715	0.876	0.661	0.547

llmtf_eval/evaluation_results.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_ru darumeru/ruOpenBookQA darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
2	+ 0.578 0.226 0.387 0.297 0.690 0.515 0.539 0.900 0.715 0.876 0.661 0.547

llmtf_eval/nlpcoreteam_enMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2f860aadb5644d251e44c9bbea39d71ad5862e64b3f1001a770a086c8822f5b
+size 37133936

llmtf_eval/nlpcoreteam_enMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/nlpcoreteam_enMMLU_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "nlpcoreteam/enMMLU",
+    "results": {
+        "acc": 0.6611166912740201
+    },
+    "leaderboard_result": 0.6611166912740201
+}

llmtf_eval/nlpcoreteam_ruMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:229441a694e7fb7247232bae6510335448080e5c96d4e78be142a61aecde5787
+size 43054560

llmtf_eval/nlpcoreteam_ruMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "/workdir/data/models/qwen/ruadapt_qwen2.5_3B_ext_u32_lr5e4/kto2",
+        "generation_config": {
+            "bos_token_id": 131506,
+            "do_sample": true,
+            "eos_token_id": [
+                131508
+            ],
+            "max_length": 32768,
+            "max_new_tokens": 64,
+            "pad_token_id": 131506,
+            "stop_strings": [
+                "<|im_end|>"
+            ],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.45.2",
+            "trust_remote_code": false
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
+            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "",
+            "suffix": "<|im_start|>assistant\n",
+            "add_special_tokens": false,
+            "eos_token": "<|im_end|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": false,
+        "max_model_len": 32768
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval/nlpcoreteam_ruMMLU_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "nlpcoreteam/ruMMLU",
+    "results": {
+        "acc": 0.5473965020204639
+    },
+    "leaderboard_result": 0.5473965020204639
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e2a488e73bc40e6fb027896a5cf81a6aedd51ef3472a094f11cb7bd2504cba
+size 4964146888

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a32a96c51697ece86dc0cac4e9aa80003a560854de238a632755eb4d8db8d7f1
+size 1124188912

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,441 @@

+{
+  "metadata": {
+    "total_size": 6088286208
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b70f434938f8add6d6ce484dedf2c36f3b4aefde24b4c74b1d3254388487bfc
+size 10610274

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "131506": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131507": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131508": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131509": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131510": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131511": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131512": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131513": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131514": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131515": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131516": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131517": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131518": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131519": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131520": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131521": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131522": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131523": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131524": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131525": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131526": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "131527": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff