Fix error when force_tokens includes multi-word sequence to preserve

#2
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -47,7 +47,7 @@ def compress(original_prompt, compression_rate, base_model="xlm-roberta-large",
47
  lines = results["fn_labeled_original_prompt"].split(word_sep)
48
  preserved_tokens = []
49
  for line in lines:
50
- word, label = line.split(label_sep)
51
  preserved_tokens.append((word, '+') if label == '1' else (word, None))
52
 
53
  return compressed_prompt, preserved_tokens, n_word_compressed
 
47
  lines = results["fn_labeled_original_prompt"].split(word_sep)
48
  preserved_tokens = []
49
  for line in lines:
50
+ word, label = line.rsplit(label_sep, 1)
51
  preserved_tokens.append((word, '+') if label == '1' else (word, None))
52
 
53
  return compressed_prompt, preserved_tokens, n_word_compressed