Spaces:

JetBrains-Research
/

commit-message-editing-visualization

Sleeping

App Files Files Community

Petr Tsvetkov commited on Jun 26

Commit

a3d6ea6

•

1 Parent(s): 9d14712

- Fix the grazie api

Browse files

- Compute the average CM lengths for the dataset and for the production prompt
- Update the charts

Files changed (3) hide show

api_wrappers/grazie_wrapper.py +1 -1
chart_processing.ipynb +0 -0
generated_message_length_comparison.ipynb +102 -32

api_wrappers/grazie_wrapper.py CHANGED Viewed

@@ -11,7 +11,7 @@ import config
 client = GrazieApiGatewayClient(
     grazie_agent=GrazieAgent("grazie-toolformers", "v1.0"),
     url=GrazieApiGatewayUrls.STAGING,
-    auth_type=AuthType.SERVICE,
     grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
 )

 client = GrazieApiGatewayClient(
     grazie_agent=GrazieAgent("grazie-toolformers", "v1.0"),
     url=GrazieApiGatewayUrls.STAGING,
+    auth_type=AuthType.APPLICATION,
     grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
 )

chart_processing.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

generated_message_length_comparison.ipynb CHANGED Viewed

@@ -15,23 +15,28 @@
    "id": "77d51d55b41735cf"
   },
   {
-   "metadata": {},
    "cell_type": "code",
    "source": [
-    "!pip install grazie-api-gateway-client\n",
-    "!pip install tqdm\n",
-    "!pip install pandas\n",
-    "!pip install datasets"
    ],
    "id": "91fa273e8987f6f6",
    "outputs": [],
-   "execution_count": null
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-31T16:15:11.790663Z",
-     "start_time": "2024-05-31T16:15:11.777607Z"
     }
    },
    "cell_type": "code",
@@ -44,13 +49,13 @@
    ],
    "id": "ce11a4c781c152e",
    "outputs": [],
-   "execution_count": 20
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-31T16:15:13.223782Z",
-     "start_time": "2024-05-31T16:15:13.207891Z"
     }
    },
    "cell_type": "code",
@@ -62,25 +67,41 @@
     "\treturn PROD_PROMPT.replace(\"$diff\", diff).replace(\"$text\", \"\")\n",
     "\n",
     "def generate_commit_message_prod(diff):\n",
-    "\tgenerate_for_prompt(prod_prompt(diff))"
    ],
    "id": "84a769c8765a7b64",
    "outputs": [],
-   "execution_count": 21
   },
   {
-   "metadata": {},
    "cell_type": "code",
    "source": "generate_commit_message_prod(\"TEST\")",
    "id": "af2f20def94b0490",
-   "outputs": [],
-   "execution_count": null
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-31T16:15:24.167706Z",
-     "start_time": "2024-05-31T16:15:16.619781Z"
     }
    },
    "cell_type": "code",
@@ -90,6 +111,16 @@
    ],
    "id": "a49cabf576c9d692",
    "outputs": [
     {
      "data": {
       "text/plain": [
@@ -161,26 +192,39 @@
        "</div>"
       ]
      },
-     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 22
   },
   {
-   "metadata": {},
    "cell_type": "code",
    "source": "DATA[\"prediction_prod\"] = DATA.progress_apply(lambda row: generate_commit_message_prod(str(row[\"diff\"])), axis=1)",
    "id": "9ded493e087f991d",
-   "outputs": [],
-   "execution_count": null
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-31T16:15:24.183660Z",
-     "start_time": "2024-05-31T16:15:24.170584Z"
     }
    },
    "cell_type": "code",
@@ -198,26 +242,52 @@
      ]
     }
    ],
-   "execution_count": 23
   },
   {
-   "metadata": {},
    "cell_type": "code",
    "source": [
     "prod_avg_length = DATA[\"prediction_prod\"].str.len().mean()\n",
     "print(f\"Prod average length: {prod_avg_length}\")"
    ],
    "id": "ec8b4412410794a4",
-   "outputs": [],
-   "execution_count": null
   },
   {
-   "metadata": {},
    "cell_type": "code",
    "source": "print(f\"Length ratio (current / prod): {current_avg_length / prod_avg_length})\")",
    "id": "10f087784896eca3",
-   "outputs": [],
-   "execution_count": null
   }
  ],
  "metadata": {

    "id": "77d51d55b41735cf"
   },
   {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-20T16:09:07.968406Z",
+     "start_time": "2024-06-20T16:09:07.955405Z"
+    }
+   },
    "cell_type": "code",
    "source": [
+    "# !pip install grazie-api-gateway-client\n",
+    "# !pip install tqdm\n",
+    "# !pip install pandas\n",
+    "# !pip install datasets"
    ],
    "id": "91fa273e8987f6f6",
    "outputs": [],
+   "execution_count": 1
   },
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-06-20T16:09:10.353479Z",
+     "start_time": "2024-06-20T16:09:07.970405Z"
     }
    },
    "cell_type": "code",
    ],
    "id": "ce11a4c781c152e",
    "outputs": [],
+   "execution_count": 2
   },
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-06-20T16:09:10.368996Z",
+     "start_time": "2024-06-20T16:09:10.354434Z"
     }
    },
    "cell_type": "code",
     "\treturn PROD_PROMPT.replace(\"$diff\", diff).replace(\"$text\", \"\")\n",
     "\n",
     "def generate_commit_message_prod(diff):\n",
+    "\treturn generate_for_prompt(prod_prompt(diff))"
    ],
    "id": "84a769c8765a7b64",
    "outputs": [],
+   "execution_count": 3
   },
   {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-20T16:09:10.384590Z",
+     "start_time": "2024-06-20T16:09:10.371410Z"
+    }
+   },
    "cell_type": "code",
    "source": "generate_commit_message_prod(\"TEST\")",
    "id": "af2f20def94b0490",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"Certainly! I'll need to see the specific code differences (diffs) you would like to have summarized into a commit message. Please provide the diffs so I can assist you properly.\""
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 4
   },
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-06-20T16:09:22.224167Z",
+     "start_time": "2024-06-20T16:09:10.388409Z"
     }
    },
    "cell_type": "code",
    ],
    "id": "a49cabf576c9d692",
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using the latest cached version of the dataset since JetBrains-Research/lca-commit-message-generation couldn't be found on the Hugging Face Hub\n",
+      "Found the latest cached dataset configuration 'commitchronicle-py-long' at cache\\JetBrains-Research___lca-commit-message-generation\\commitchronicle-py-long\\0.0.0\\58dcef83a63cccebacd3e786afd73181cc9175e5 (last modified on Sun Apr  7 11:16:22 2024).\n",
+      "Using the latest cached version of the dataset since JetBrains-Research/lca-results couldn't be found on the Hugging Face Hub\n",
+      "Found the latest cached dataset configuration 'cmg_gpt_4_0613' at cache\\JetBrains-Research___lca-results\\cmg_gpt_4_0613\\0.0.0\\4b56bbf7243da371b3e0a42a0c9db1f37af98c39 (last modified on Fri May 31 16:00:33 2024).\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
        "</div>"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
+   "execution_count": 5
   },
   {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-20T16:21:20.410778Z",
+     "start_time": "2024-06-20T16:09:22.227258Z"
+    }
+   },
    "cell_type": "code",
    "source": "DATA[\"prediction_prod\"] = DATA.progress_apply(lambda row: generate_commit_message_prod(str(row[\"diff\"])), axis=1)",
    "id": "9ded493e087f991d",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 163/163 [11:58<00:00,  4.41s/it]\n"
+     ]
+    }
+   ],
+   "execution_count": 6
   },
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-06-20T16:21:20.426781Z",
+     "start_time": "2024-06-20T16:21:20.414781Z"
     }
    },
    "cell_type": "code",
      ]
     }
    ],
+   "execution_count": 7
   },
   {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-20T16:21:20.442017Z",
+     "start_time": "2024-06-20T16:21:20.429913Z"
+    }
+   },
    "cell_type": "code",
    "source": [
     "prod_avg_length = DATA[\"prediction_prod\"].str.len().mean()\n",
     "print(f\"Prod average length: {prod_avg_length}\")"
    ],
    "id": "ec8b4412410794a4",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prod average length: 352.88957055214723\n"
+     ]
+    }
+   ],
+   "execution_count": 8
   },
   {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-06-20T16:21:20.457884Z",
+     "start_time": "2024-06-20T16:21:20.444852Z"
+    }
+   },
    "cell_type": "code",
    "source": "print(f\"Length ratio (current / prod): {current_avg_length / prod_avg_length})\")",
    "id": "10f087784896eca3",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Length ratio (current / prod): 1.772691712591923)\n"
+     ]
+    }
+   ],
+   "execution_count": 9
   }
  ],
  "metadata": {