Petr Tsvetkov
commited on
Commit
β’
4017643
1
Parent(s):
f26a894
# of deletions rel to initial message length
Browse files- change_visualizer.py +1 -1
- statistics.py +6 -4
change_visualizer.py
CHANGED
@@ -82,7 +82,7 @@ if __name__ == '__main__':
|
|
82 |
def layout_for_statistics(statistics_group_name):
|
83 |
gr.Markdown(f"### {statistics_group_name}")
|
84 |
stats = STATISTICS[statistics_group_name]
|
85 |
-
gr.Number(label="Average deletions number (rel to the
|
86 |
value=stats['deletions'].mean().item(), precision=3)
|
87 |
gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
|
88 |
value=stats['insertions'].mean().item(), precision=3)
|
|
|
82 |
def layout_for_statistics(statistics_group_name):
|
83 |
gr.Markdown(f"### {statistics_group_name}")
|
84 |
stats = STATISTICS[statistics_group_name]
|
85 |
+
gr.Number(label="Average deletions number (rel to the initial message length)", interactive=False,
|
86 |
value=stats['deletions'].mean().item(), precision=3)
|
87 |
gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
|
88 |
value=stats['insertions'].mean().item(), precision=3)
|
statistics.py
CHANGED
@@ -2,7 +2,7 @@ import numpy as np
|
|
2 |
import pandas as pd
|
3 |
|
4 |
|
5 |
-
def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
|
6 |
relative_deletions = []
|
7 |
relative_insertions = []
|
8 |
relative_changes = []
|
@@ -18,8 +18,9 @@ def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
|
|
18 |
|
19 |
sum_changes = sum_deletions + sum_insertions
|
20 |
end_length = len(row[end_col])
|
|
|
21 |
|
22 |
-
relative_deletions.append(sum_deletions /
|
23 |
relative_insertions.append(sum_insertions / end_length)
|
24 |
relative_changes.append(sum_changes / end_length)
|
25 |
|
@@ -31,8 +32,9 @@ def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col):
|
|
31 |
|
32 |
|
33 |
def get_statistics_for_manual_df(df):
|
34 |
-
return get_statistics_for_df(df, end_col='commit_msg_end',
|
|
|
35 |
|
36 |
|
37 |
def get_statistics_for_synthetic_df(df):
|
38 |
-
return get_statistics_for_df(df, end_col='reference', annotated_col='annotated_diff')
|
|
|
2 |
import pandas as pd
|
3 |
|
4 |
|
5 |
+
def get_statistics_for_df(df: pd.DataFrame, start_col, end_col, annotated_col):
|
6 |
relative_deletions = []
|
7 |
relative_insertions = []
|
8 |
relative_changes = []
|
|
|
18 |
|
19 |
sum_changes = sum_deletions + sum_insertions
|
20 |
end_length = len(row[end_col])
|
21 |
+
start_length = len(row[start_col])
|
22 |
|
23 |
+
relative_deletions.append(sum_deletions / start_length)
|
24 |
relative_insertions.append(sum_insertions / end_length)
|
25 |
relative_changes.append(sum_changes / end_length)
|
26 |
|
|
|
32 |
|
33 |
|
34 |
def get_statistics_for_manual_df(df):
|
35 |
+
return get_statistics_for_df(df, start_col="commit_msg_start", end_col='commit_msg_end',
|
36 |
+
annotated_col='annotated_diff')
|
37 |
|
38 |
|
39 |
def get_statistics_for_synthetic_df(df):
|
40 |
+
return get_statistics_for_df(df, start_col="commit_msg_start", end_col='reference', annotated_col='annotated_diff')
|