P0x0
/

Astra-v1-12B

@@ -60,18 +60,119 @@ model-index:
     results:
       - task:
           type: text-generation
         metrics:
-          - name: Average
             value: 19.46
-          - name: IFEval
             value: 28.06
-          - name: BBH
             value: 31.81
-          - name: MATH Lvl 5
             value: 9.67
-          - name: GPQA
             value: 8.5
-          - name: MUSR
             value: 11.38
-          - name: MMLU-PRO
-             value: 27.34

     results:
       - task:
           type: text-generation
+          name: Text Generation
+        dataset:
+          name: Average
+          type: Aggregated Metrics
         metrics:
+          - type: average_score
             value: 19.46
+            name: Average Score
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: IFEval
+          type: HuggingFaceH4/ifeval
+          args:
+            num_few_shot: 0
+        metrics:
+          - type: strict_accuracy
             value: 28.06
+            name: Strict Accuracy
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: BBH
+          type: BBH
+          args:
+            num_few_shot: 3
+        metrics:
+          - type: normalized_accuracy
             value: 31.81
+            name: Normalized Accuracy
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: MATH Lvl 5
+          type: hendrycks/competition_math
+          args:
+            num_few_shot: 4
+        metrics:
+          - type: exact_match
             value: 9.67
+            name: Exact Match
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: GPQA
+          type: Idavidrein/gpqa
+          args:
+            num_few_shot: 0
+        metrics:
+          - type: normalized_accuracy
             value: 8.5
+            name: Normalized Accuracy
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: MuSR
+          type: TAUR-Lab/MuSR
+          args:
+            num_few_shot: 0
+        metrics:
+          - type: normalized_accuracy
             value: 11.38
+            name: Normalized Accuracy
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B
+      - task:
+          type: text-generation
+          name: Text Generation
+        dataset:
+          name: MMLU-PRO
+          type: TIGER-Lab/MMLU-Pro
+          config: main
+          split: test
+          args:
+            num_few_shot: 5
+        metrics:
+          - type: accuracy
+            value: 27.34
+            name: Accuracy
+        source:
+          name: Open LLM Leaderboard
+          url: >-
+            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=P0x0/Astra-v1-12B