Spaces:
Running
Running
Update data/hf_spark_utils.py
Browse files- data/hf_spark_utils.py +1 -1
data/hf_spark_utils.py
CHANGED
@@ -177,7 +177,7 @@ def write_parquet(df: DataFrame, path: str, **kwargs) -> None:
|
|
177 |
df.mapInArrow(
|
178 |
partial(_preupload, path=path, schema=to_arrow_schema(df.schema), filesystem=filesystem, **kwargs),
|
179 |
from_arrow_schema(pa.schema({"addition": pa.binary()})),
|
180 |
-
).
|
181 |
partial(_commit, path=path, filesystem=filesystem),
|
182 |
from_arrow_schema(pa.schema({"path": pa.string()})),
|
183 |
).collect()
|
|
|
177 |
df.mapInArrow(
|
178 |
partial(_preupload, path=path, schema=to_arrow_schema(df.schema), filesystem=filesystem, **kwargs),
|
179 |
from_arrow_schema(pa.schema({"addition": pa.binary()})),
|
180 |
+
).repartition(1).mapInArrow(
|
181 |
partial(_commit, path=path, filesystem=filesystem),
|
182 |
from_arrow_schema(pa.schema({"path": pa.string()})),
|
183 |
).collect()
|