Petr Tsvetkov commited on
Commit
6503e4e
β€’
1 Parent(s): aab3281

WIP on annotated diffs generation

Browse files
Files changed (1) hide show
  1. generate_annotated_diffs.py +45 -5
generate_annotated_diffs.py CHANGED
@@ -1,19 +1,22 @@
 
1
  from datetime import datetime
2
 
 
 
3
  import hf_data_loader
4
 
5
 
6
  def group_changes(changes):
7
  groups = {}
8
  for change in changes:
9
- group = datetime.fromisoformat(change.ts)
10
  if group not in groups:
11
  groups[group] = []
12
  groups[group].append(change)
13
 
14
  grouped_changes = []
15
  for group in sorted(groups.keys()):
16
- grouped_changes.sort(key=lambda x: x.p)
17
  grouped_changes.append(groups[group])
18
 
19
  return grouped_changes
@@ -21,12 +24,49 @@ def group_changes(changes):
21
 
22
  def get_annotated_diff(initial_text, changes):
23
  grouped_changes = group_changes(changes)
24
- text = [((c, " ") for c in initial_text)]
25
  for change_group in grouped_changes:
 
26
  text_pointer = 0
 
27
  change_pointer = 0
28
  while text_pointer < len(text):
29
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
- df = hf_data_loader.load_raw_dataset_as_pandas()
 
 
 
 
 
 
 
 
1
+ import json
2
  from datetime import datetime
3
 
4
+ import gradio as gr
5
+
6
  import hf_data_loader
7
 
8
 
9
  def group_changes(changes):
10
  groups = {}
11
  for change in changes:
12
+ group = datetime.fromisoformat(change['ts'])
13
  if group not in groups:
14
  groups[group] = []
15
  groups[group].append(change)
16
 
17
  grouped_changes = []
18
  for group in sorted(groups.keys()):
19
+ groups[group].sort(key=lambda x: x['p'])
20
  grouped_changes.append(groups[group])
21
 
22
  return grouped_changes
 
24
 
25
  def get_annotated_diff(initial_text, changes):
26
  grouped_changes = group_changes(changes)
27
+ text = [(c, None) for c in initial_text]
28
  for change_group in grouped_changes:
29
+ next_text = []
30
  text_pointer = 0
31
+ real_text_ind = 0
32
  change_pointer = 0
33
  while text_pointer < len(text):
34
+ if change_pointer >= len(change_group) or real_text_ind < change_group[change_pointer]['p']:
35
+ next_text.append(text[text_pointer])
36
+ real_text_ind += 1
37
+ text_pointer += 1
38
+ elif change_group[change_pointer]['t'] == '+':
39
+ if not (text[text_pointer][1] == '-' and text[text_pointer][0] == change_group[change_pointer]['c']):
40
+ next_text.append((change_group[change_pointer]['c'], '+'))
41
+ else:
42
+ text_pointer += 1
43
+
44
+ real_text_ind += 1
45
+ change_pointer += 1
46
+ elif change_group[change_pointer]['t'] == '-':
47
+ if not (text[text_pointer][1] == '+' and text[text_pointer][0] == change_group[change_pointer]['c']):
48
+ next_text.append((text[text_pointer][0], '-'))
49
+ text_pointer += 1
50
+
51
+ real_text_ind += 1
52
+ change_pointer += 1
53
+ else:
54
+ raise RuntimeError("Unexpected branch")
55
+ text = next_text
56
+ return text
57
+
58
+
59
+ def annotated_diff_for_row(row):
60
+ start = row['commit_msg_start']
61
+ changes = json.loads(row['commit_msg_history'])
62
+ return get_annotated_diff(start, changes)
63
 
64
 
65
+ if __name__ == '__main__':
66
+ df = hf_data_loader.load_raw_dataset_as_pandas()
67
+ annotated = df.apply(annotated_diff_for_row, axis=1)
68
+ with gr.Blocks(theme=gr.themes.Soft()) as application:
69
+ gr.Highlightedtext(value=annotated[0], combine_adjacent=True, color_map={'+': "green", '-': "red"})
70
+ gr.Markdown(value=df.iloc[0]['commit_msg_start'])
71
+ gr.Markdown(value=df.iloc[0]['commit_msg_end'])
72
+ application.launch()