from os import environ import gradio as gr from huggingface_hub import HfApi import subprocess from pdf2image import convert_from_path import tempfile API = HfApi() TOKEN = environ.get("TOKEN") def _sha256sum_creation(file_path: str) -> str: sha_hash = subprocess.run(["sha256sum", file_path], stdout=subprocess.PIPE, text=True).stdout.split()[0] return sha_hash def _rename_file(file_path: str, sha_hash: str) -> str: new_file_path = file_path.replace(file_path.split("/")[-1], sha_hash + "_" + file_path.split("/")[-1]) subprocess.run(["mv", file_path, new_file_path]) return new_file_path def _upload_hub(file_path: str) -> None: sha256_hash = _sha256sum_creation(file_path) new_file_path = _rename_file(file_path, sha256_hash) API.upload_file( path_or_fileobj=new_file_path, path_in_repo=new_file_path.split("/")[-1], token=TOKEN, repo_type="dataset", repo_id="Felix92/docTR-multilingual-data-collection", ) def upload_to_hub(file_upload, camera_upload, agree): try: if not agree: return gr.Markdown("You must agree to the terms and conditions before proceeding."), None if file_upload: if file_upload.endswith(".pdf"): with tempfile.TemporaryDirectory() as path: for file_path in convert_from_path(file_upload, output_folder=path, paths_only=True, fmt='png')[:10]: _upload_hub(file_path) if camera_upload: _upload_hub(camera_upload) return gr.update(visible=False), gr.Markdown("""

Upload was successful! You can upload another document.

"""), gr.update(value=None), gr.update(value=None) except Exception as e: return gr.update(visible=False), gr.Markdown(f"""

An error occured: {e}

"""), gr.update(value=None), gr.update(value=None) with gr.Blocks(fill_height=True) as demo: agreement_markdown = gr.Markdown( """

Document Upload Agreement

This is a Hugging Face space for the docTR/OnnxTR community to collect multilingual data for the following project/s:

docTR

OnnxTR

You can upload PDF (up to 10 sites), JPG, or PNG files to this space via file upload or webcam / from mobile phone. The uploaded documents will be used to train and evaluate models for the docTR/OnnxTR projects.

All uploaded files can be found here: Hugging Face dataset



By uploading a document, you explicitly agree to the following terms:

1. You affirm that you are the owner or have the necessary rights to upload and share the document.

2. You agree that the uploaded document will be made publicly available to everyone.

3. You agree that the uploaded document can be used for any purpose, including commercial use, by any third party.

""" ) agree_button = gr.Button("I Agree to the Terms and Conditions") agree_state = gr.State(value=False) # State to store agreement status with gr.Column(visible=False) as upload_section: success_message = gr.Markdown(visible=True) gr.Markdown("Upload a document via camera capture.") camera_upload = gr.Image(label="Upload Image [JPG | PNG] via camera", type="filepath", sources=["webcam"], mirror_webcam=False) gr.Markdown("Upload a document via file upload.") file_upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"], type="filepath") submit_button = gr.Button("Submit") def toggle_agreement_visibility(): return gr.update(visible=False), gr.update(visible=False), True, gr.update(visible=True) # Clicking the "I Agree" button hides the agreement and shows the upload section agree_button.click(fn=toggle_agreement_visibility, inputs=None, outputs=[agreement_markdown, agree_button, agree_state, upload_section]) submit_button.click(fn=upload_to_hub, inputs=[file_upload, camera_upload, agree_state], outputs=[agree_button, success_message, file_upload, camera_upload]) if __name__ == "__main__": demo.launch()