from itertools import islice import gradio as gr from datatrove.executor.local import LocalPipelineExecutor from datatrove.pipeline.base import PipelineStep from datatrove.pipeline.extractors import Trafilatura from datatrove.pipeline.filters import ( C4QualityFilter, FineWebQualityFilter, GopherQualityFilter, GopherRepetitionFilter, LanguageFilter, URLFilter, ) from datatrove.pipeline.readers import WarcReader from datatrove.pipeline.writers.jsonl import JsonlWriter def run(input): return "wip" demo = gr.Interface(run, inputs=[gr.Textbox()], outputs=[gr.Textbox()]) demo.launch()