P0x0
/

Astra-v1-12B

@@ -55,121 +55,34 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```
 I encourage you to provide feedback on the model's performance. If you'd like to create your own quantizations, feel free to do so and let me know how it works for you!
-model-index:
-  - name: P0x0/Astra-v1-12B
-    results:
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: Average
-          type: Aggregated Metrics
-        metrics:
-          - name: Average Score
-            type: average_score
-            value: 19.46
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: IFEval
-          type: HuggingFaceH4/ifeval
-          args:
-            num_few_shot: 0
-        metrics:
-          - name: Strict Accuracy (Instance Level)
-            type: inst_level_strict_acc
-            value: 28.06
-          - name: Strict Accuracy (Prompt Level)
-            type: prompt_level_strict_acc
-            value: 28.06
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: BBH
-          type: BBH
-          args:
-            num_few_shot: 3
-        metrics:
-          - name: Normalized Accuracy
-            type: acc_norm
-            value: 31.81
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: MATH Lvl 5
-          type: hendrycks/competition_math
-          args:
-            num_few_shot: 4
-        metrics:
-          - name: Exact Match
-            type: exact_match
-            value: 9.67
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: GPQA
-          type: Idavidrein/gpqa
-          args:
-            num_few_shot: 0
-        metrics:
-          - name: Normalized Accuracy
-            type: acc_norm
-            value: 8.5
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: MuSR
-          type: TAUR-Lab/MuSR
-          args:
-            num_few_shot: 0
-        metrics:
-          - name: Normalized Accuracy
-            type: acc_norm
-            value: 11.38
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
-      - task:
-          type: text-generation
-          name: Text Generation
-        dataset:
-          name: MMLU-PRO
-          type: TIGER-Lab/MMLU-Pro
-          config: main
-          split: test
-          args:
-            num_few_shot: 5
-        metrics:
-          - name: Accuracy
-            type: acc
-            value: 27.34
-        source:
-          name: Open LLM Leaderboard
-          url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B

 ```
 I encourage you to provide feedback on the model's performance. If you'd like to create your own quantizations, feel free to do so and let me know how it works for you!
+metrics:
+  - name: "Average ⬆️"
+    dataset_name: "Average"
+    metric_type: "average_score"
+    value: 19.46
+  - name: "IFEval"
+    dataset_name: "IFEval"
+    metric_type: "inst_level_strict_acc, prompt_level_strict_acc"
+    value: 28.06
+  - name: "BBH"
+    dataset_name: "BBH"
+    metric_type: "acc_norm"
+    value: 31.81
+  - name: "MATH Lvl 5"
+    dataset_name: "MATH Lvl 5"
+    metric_type: "exact_match"
+    value: 9.67
+  - name: "GPQA"
+    dataset_name: "GPQA"
+    metric_type: "acc_norm"
+    value: 8.5
+  - name: "MUSR"
+    dataset_name: "MuSR"
+    metric_type: "acc_norm"
+    value: 11.38
+  - name: "MMLU-PRO"
+    dataset_name: "MMLU-PRO"
+    metric_type: "acc"
+    value: 27.34