Updates
Browse files
app.py
CHANGED
@@ -129,7 +129,7 @@ def perform_deduplication(
|
|
129 |
f"**Total documents:** {len(texts1)}\n\n"
|
130 |
f"**Duplicates found:** {num_duplicates}\n\n"
|
131 |
f"**Unique documents after deduplication:** {len(deduplicated_indices)}\n\n"
|
132 |
-
"-" * 50 + "\n\n"
|
133 |
)
|
134 |
|
135 |
if num_duplicates > 0:
|
@@ -167,6 +167,7 @@ def perform_deduplication(
|
|
167 |
f"**Total documents in {dataset2_name}/{dataset2_split}:** {len(texts2)}\n\n"
|
168 |
f"**Duplicates found in Dataset 2:** {num_duplicates}\n\n"
|
169 |
f"**Unique documents after deduplication:** {len(texts2) - num_duplicates}\n\n"
|
|
|
170 |
)
|
171 |
|
172 |
if num_duplicates > 0:
|
|
|
129 |
f"**Total documents:** {len(texts1)}\n\n"
|
130 |
f"**Duplicates found:** {num_duplicates}\n\n"
|
131 |
f"**Unique documents after deduplication:** {len(deduplicated_indices)}\n\n"
|
132 |
+
+ "-" * 50 + "\n\n"
|
133 |
)
|
134 |
|
135 |
if num_duplicates > 0:
|
|
|
167 |
f"**Total documents in {dataset2_name}/{dataset2_split}:** {len(texts2)}\n\n"
|
168 |
f"**Duplicates found in Dataset 2:** {num_duplicates}\n\n"
|
169 |
f"**Unique documents after deduplication:** {len(texts2) - num_duplicates}\n\n"
|
170 |
+
+ "-" * 50 + "\n\n"
|
171 |
)
|
172 |
|
173 |
if num_duplicates > 0:
|