Plim commited on
Commit
3aea4ea
1 Parent(s): 0c54d4b

repush model but with git lfs tracking

Browse files
Files changed (2) hide show
  1. eval.py +3 -7
  2. pytorch_model.bin +3 -0
eval.py CHANGED
@@ -48,20 +48,16 @@ def log_results(result: Dataset, args: Dict[str, str]):
48
 
49
  def normalize_text(text: str) -> str:
50
  """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
51
-
52
- chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
53
-
54
- text = re.sub(chars_to_ignore_regex, "", text.lower()).replace('’', "'")
55
-
56
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
57
  # note that order is important here!
58
  token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
59
-
60
  for t in token_sequences_to_ignore:
61
  text = " ".join(text.split(t))
62
 
63
- return text
 
64
 
 
65
 
66
  def main(args):
67
  # load dataset
 
48
 
49
  def normalize_text(text: str) -> str:
50
  """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
 
 
 
 
 
51
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
52
  # note that order is important here!
53
  token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
 
54
  for t in token_sequences_to_ignore:
55
  text = " ".join(text.split(t))
56
 
57
+ chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
58
+ text = re.sub(chars_to_ignore_regex, "", text.lower()).replace('’', "'")
59
 
60
+ return text
61
 
62
  def main(args):
63
  # load dataset
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7ac9a4075231a9b1f2ef054fe1161fdf7235b6c7bd018f7505d44da3332960
3
+ size 3850548401