hynky HF staff commited on
Commit
b4f0c17
1 Parent(s): 89bd84c

I hope I added using lfs

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. datasets/allenai_c4_en/fqdn/avg_line_length/metric.json +3 -0
  3. datasets/allenai_c4_en/fqdn/avg_sentence_length/metric.json +3 -0
  4. datasets/allenai_c4_en/fqdn/avg_word_length/metric.json +3 -0
  5. datasets/allenai_c4_en/fqdn/avg_words_per_line/metric.json +3 -0
  6. datasets/allenai_c4_en/fqdn/capitalized_ratio/metric.json +3 -0
  7. datasets/allenai_c4_en/fqdn/ccnet_perplexity_wikipedia_en/metric.json +3 -0
  8. datasets/allenai_c4_en/fqdn/digit_ratio/metric.json +3 -0
  9. datasets/allenai_c4_en/fqdn/elipsis_ratio/metric.json +3 -0
  10. datasets/allenai_c4_en/fqdn/fasttext_en/metric.json +3 -0
  11. datasets/allenai_c4_en/fqdn/length/metric.json +3 -0
  12. datasets/allenai_c4_en/fqdn/line_char_duplicates/metric.json +3 -0
  13. datasets/allenai_c4_en/fqdn/line_duplicates/metric.json +3 -0
  14. datasets/allenai_c4_en/fqdn/lines_ending_with_terminal_mark_ratio/metric.json +3 -0
  15. datasets/allenai_c4_en/fqdn/long_line_ratio_chars_10000/metric.json +3 -0
  16. datasets/allenai_c4_en/fqdn/long_line_ratio_chars_2000/metric.json +3 -0
  17. datasets/allenai_c4_en/fqdn/long_sentence_ratio_75/metric.json +3 -0
  18. datasets/allenai_c4_en/fqdn/long_word_ratio_7/metric.json +3 -0
  19. datasets/allenai_c4_en/fqdn/n_lines/metric.json +3 -0
  20. datasets/allenai_c4_en/fqdn/n_sentences/metric.json +3 -0
  21. datasets/allenai_c4_en/fqdn/n_words/metric.json +3 -0
  22. datasets/allenai_c4_en/fqdn/non_alpha_digit_ratio/metric.json +3 -0
  23. datasets/allenai_c4_en/fqdn/punctuation_ratio/metric.json +3 -0
  24. datasets/allenai_c4_en/fqdn/short_line_ratio_chars_10/metric.json +3 -0
  25. datasets/allenai_c4_en/fqdn/short_line_ratio_chars_30/metric.json +3 -0
  26. datasets/allenai_c4_en/fqdn/short_sentence_ratio_20/metric.json +3 -0
  27. datasets/allenai_c4_en/fqdn/short_word_ratio_3/metric.json +3 -0
  28. datasets/allenai_c4_en/fqdn/stop_word_ratio/metric.json +3 -0
  29. datasets/allenai_c4_en/fqdn/type_token_ratio/metric.json +3 -0
  30. datasets/allenai_c4_en/fqdn/uppercase_ratio/metric.json +3 -0
  31. datasets/allenai_c4_en/fqdn/white_space_ratio/metric.json +3 -0
  32. datasets/allenai_c4_en/histogram/avg_line_length/metric.json +3 -0
  33. datasets/allenai_c4_en/histogram/avg_sentence_length/metric.json +3 -0
  34. datasets/allenai_c4_en/histogram/avg_word_length/metric.json +3 -0
  35. datasets/allenai_c4_en/histogram/avg_words_per_line/metric.json +3 -0
  36. datasets/allenai_c4_en/histogram/capitalized_ratio/metric.json +3 -0
  37. datasets/allenai_c4_en/histogram/ccnet_perplexity_wikipedia_en/metric.json +3 -0
  38. datasets/allenai_c4_en/histogram/digit_ratio/metric.json +3 -0
  39. datasets/allenai_c4_en/histogram/elipsis_ratio/metric.json +3 -0
  40. datasets/allenai_c4_en/histogram/fasttext_en/metric.json +3 -0
  41. datasets/allenai_c4_en/histogram/length/metric.json +3 -0
  42. datasets/allenai_c4_en/histogram/line_char_duplicates/metric.json +3 -0
  43. datasets/allenai_c4_en/histogram/line_duplicates/metric.json +3 -0
  44. datasets/allenai_c4_en/histogram/lines_ending_with_terminal_mark_ratio/metric.json +3 -0
  45. datasets/allenai_c4_en/histogram/long_line_ratio_chars_10000/metric.json +3 -0
  46. datasets/allenai_c4_en/histogram/long_line_ratio_chars_2000/metric.json +3 -0
  47. datasets/allenai_c4_en/histogram/long_sentence_ratio_75/metric.json +3 -0
  48. datasets/allenai_c4_en/histogram/long_word_ratio_5/metric.json +3 -0
  49. datasets/allenai_c4_en/histogram/long_word_ratio_7/metric.json +3 -0
  50. datasets/allenai_c4_en/histogram/n_lines/metric.json +3 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  datasets/* filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  datasets/* filter=lfs diff=lfs merge=lfs -text
37
+ datasets/** filter=lfs diff=lfs merge=lfs -text
datasets/allenai_c4_en/fqdn/avg_line_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e295d246a9b709aebd04e178ba14a42d004148b45359f865b342bbb8c283a1
3
+ size 1623484
datasets/allenai_c4_en/fqdn/avg_sentence_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844455b9c507a8b176149482b26b3520f96b39e126b983fb7143a326c8932bb8
3
+ size 1674861
datasets/allenai_c4_en/fqdn/avg_word_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929c07146bbe8a77500af69f52414a97026d8cdb2a7ce277cbf2282437b4b1b8
3
+ size 1761053
datasets/allenai_c4_en/fqdn/avg_words_per_line/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524677a4969079244caf66ac8b92326b86d0733f24accd4c01abcb98e3318901
3
+ size 1616865
datasets/allenai_c4_en/fqdn/capitalized_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaa4cffbf0cab6b0a12feaaf383d4e827c2b658872017c0cbe32cb5a06573f5
3
+ size 1794500
datasets/allenai_c4_en/fqdn/ccnet_perplexity_wikipedia_en/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffbbe7122e820473190be77a92836a26690f2a6f0bd54eb7e40b520aaddac755
3
+ size 1569467
datasets/allenai_c4_en/fqdn/digit_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a637bfb0478881101ea1a4fd471e286c7e97c673875fdffd6ce359b23583447
3
+ size 1746356
datasets/allenai_c4_en/fqdn/elipsis_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fdec8ce0fbe952a130d4874e14686a724da961ae26dcc3f6a9458b8eb0ba9c5
3
+ size 1592486
datasets/allenai_c4_en/fqdn/fasttext_en/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e57979bbc0ad4ab11760fef7b675ca5d3a2bd891926ffad327f0e9860622a7
3
+ size 1805069
datasets/allenai_c4_en/fqdn/length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ba340b60da923dbf4425d4cea30d5277c3d618d6eb13e2075b0e78c2845dac8
3
+ size 1468886
datasets/allenai_c4_en/fqdn/line_char_duplicates/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d375b628e2f34bc839a1124ae6ffab4957c275ad3335271e6bde2a49de652a5
3
+ size 201296
datasets/allenai_c4_en/fqdn/line_duplicates/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d375b628e2f34bc839a1124ae6ffab4957c275ad3335271e6bde2a49de652a5
3
+ size 201296
datasets/allenai_c4_en/fqdn/lines_ending_with_terminal_mark_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fddfac9f4d3f26ff639798cadf388b3f7066c6b1ebd79ce49112181a93d12d9
3
+ size 457088
datasets/allenai_c4_en/fqdn/long_line_ratio_chars_10000/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36bf4f9d8ea410a63b9bafcd5309e8a3c02131f72c38dccf9809bf5e0f18c0dd
3
+ size 273594
datasets/allenai_c4_en/fqdn/long_line_ratio_chars_2000/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7870d486f759a4281d64777842f1b2ffc9000ab9fbbc156b2f060aa186fb905d
3
+ size 1007208
datasets/allenai_c4_en/fqdn/long_sentence_ratio_75/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ecfb49ba00c0b76beb322e54b276ec687e32d95ad376801b68264e5c9faaae
3
+ size 1605361
datasets/allenai_c4_en/fqdn/long_word_ratio_7/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ed493dfe25ed5ca3fe2b84088c55f217697a01456b00c8a1c17b44c04641a71
3
+ size 1792368
datasets/allenai_c4_en/fqdn/n_lines/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd437b80fe4f2a4e7d717cd8868a9f779a723e0e988691fac8e841b106990a8c
3
+ size 1401311
datasets/allenai_c4_en/fqdn/n_sentences/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6589be3e061a255ed88dc51c6b068c7d4c30211c636aad36b635a31bef3a1b
3
+ size 1416594
datasets/allenai_c4_en/fqdn/n_words/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b6aebbb119d8dc9524917b223e023a91f6e6f7c04e230e257212f52903813a
3
+ size 1451188
datasets/allenai_c4_en/fqdn/non_alpha_digit_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a6ed5050c14e6db1d8919c9ae4b1c990485389f748881cd707d78bab938262
3
+ size 1846055
datasets/allenai_c4_en/fqdn/punctuation_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30a82db9526d7cb87c7aea1497b1cc2e9c372058720e4572738a9f3c04ada07
3
+ size 1863971
datasets/allenai_c4_en/fqdn/short_line_ratio_chars_10/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3aa96062ab15f0a5ee70c48fa5417c5bec9f6ed56f38959a50f004fcc554e6
3
+ size 224470
datasets/allenai_c4_en/fqdn/short_line_ratio_chars_30/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06aa0809dbedf7bad9ad3aef7e6a8dd7a63eee2121addd38cc4a641dd5918d34
3
+ size 1516332
datasets/allenai_c4_en/fqdn/short_sentence_ratio_20/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3af2c2ebc293af79d69f17d9a959a45afffd5a8b87ddc8a32224f6c25f2b2c
3
+ size 1619393
datasets/allenai_c4_en/fqdn/short_word_ratio_3/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd76ee88aefd796ee002ff0bbe19f0e534633f1c6ebae120e0a96409cfd2d53b
3
+ size 1807004
datasets/allenai_c4_en/fqdn/stop_word_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fcf0909a13e9b5fa4f2474e23e2ed6c933cdeb8442ec39165be187edf1df2e
3
+ size 1758157
datasets/allenai_c4_en/fqdn/type_token_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82fbe81626621522e1844b6f0db3d1c91bf049bdcac627bfd98e71401a7dcd2a
3
+ size 1796474
datasets/allenai_c4_en/fqdn/uppercase_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25f8fc76974e04ca7f1b18e5bcbf32c04d726e7e718f89ec9ad93ed21a06711
3
+ size 1836030
datasets/allenai_c4_en/fqdn/white_space_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b481dc7f412871a09f9ae24bbaa1015f90e620efca35763930a0b326d3497c41
3
+ size 1846756
datasets/allenai_c4_en/histogram/avg_line_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64706836547dd92505c462d14b288aa45eb1237b0f62b8d54dc5ead6e68577ce
3
+ size 32776755
datasets/allenai_c4_en/histogram/avg_sentence_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fbbcd53e5e6edc22dd7e1f145e3d30d494bc534615a00c30aa989e49c3a648
3
+ size 12712015
datasets/allenai_c4_en/histogram/avg_word_length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49fec58610045ca6c70cc840b4ee5063dce64127d4b0b8ee99e682681050b380
3
+ size 492624
datasets/allenai_c4_en/histogram/avg_words_per_line/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2675b3b6c52e8f6cc4e002c7f2256e40618bae71c2d5e718f6f78c4a5a7cce15
3
+ size 8711193
datasets/allenai_c4_en/histogram/capitalized_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635f1112a515a1215a03d501cc3a36718202959b8a9258b3df1afd3547223c8e
3
+ size 41710
datasets/allenai_c4_en/histogram/ccnet_perplexity_wikipedia_en/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4fe377ec9f760fd50fdb88b2fb0d1f82366a059099a64575195cd6cfd4b8b3e
3
+ size 6708687
datasets/allenai_c4_en/histogram/digit_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c808cf4362d15b756fa6adb4156c86234d11607e6ef06433896da8027469fe
3
+ size 28476
datasets/allenai_c4_en/histogram/elipsis_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3706de37bdeae4d16468d6334f23929e78099faa71e8ee6372af38c5ac1f47f
3
+ size 22912
datasets/allenai_c4_en/histogram/fasttext_en/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab49879c8588e296a222ade352f6a08ac11fbb12eb33e86d6ffa451889c9ed7
3
+ size 40664
datasets/allenai_c4_en/histogram/length/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e5f6915cd97e5eb1cfb366aabd5f877e40778b9636ddeca69bfaefd9496181
3
+ size 3984610
datasets/allenai_c4_en/histogram/line_char_duplicates/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f35e4d190e53a300f84d066f45da136febaf7f900d0a264fbedc24e616e053
3
+ size 45
datasets/allenai_c4_en/histogram/line_duplicates/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f35e4d190e53a300f84d066f45da136febaf7f900d0a264fbedc24e616e053
3
+ size 45
datasets/allenai_c4_en/histogram/lines_ending_with_terminal_mark_ratio/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c7c78e254802853ca66bc3bb163f1436285eacdc2bb06b4fc11ea00ad2438a
3
+ size 45
datasets/allenai_c4_en/histogram/long_line_ratio_chars_10000/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf0a27248386ac8ecbc79e1e722370833d647aa16ef3794e7400404f7627fd1
3
+ size 5864
datasets/allenai_c4_en/histogram/long_line_ratio_chars_2000/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10d45e9aa9f9549bde1bfc627905e1e8984706525df1b5605c3ca2794435616
3
+ size 28442
datasets/allenai_c4_en/histogram/long_sentence_ratio_75/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5013be4054e7fb0f0c3c7b128fc83be203a26e3ec1bfe934f2946346d0c2918
3
+ size 42874
datasets/allenai_c4_en/histogram/long_word_ratio_5/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4f984de0c13737617248d94c1370fdd82cf7a275d56df051d40e188c227cbb
3
+ size 4946
datasets/allenai_c4_en/histogram/long_word_ratio_7/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127e2c2c254335993754dcc3ffd3f42c411d9dc510319d546d2d4d263c538d67
3
+ size 37781
datasets/allenai_c4_en/histogram/n_lines/metric.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce2f177430bb3b565a7bd822bcf28707ace7f0d3dab095a8a5f6f736178903b6
3
+ size 51276