from os import environ
import gradio as gr
from huggingface_hub import HfApi
import subprocess
from pdf2image import convert_from_path
import tempfile
API = HfApi()
TOKEN = environ.get("TOKEN")
def _sha256sum_creation(file_path: str) -> str:
sha_hash = subprocess.run(["sha256sum", file_path], stdout=subprocess.PIPE, text=True).stdout.split()[0]
return sha_hash
def _rename_file(file_path: str, sha_hash: str) -> str:
new_file_path = file_path.replace(file_path.split("/")[-1], sha_hash + "_" + file_path.split("/")[-1])
subprocess.run(["mv", file_path, new_file_path])
return new_file_path
def _upload_hub(file_path: str) -> None:
sha256_hash = _sha256sum_creation(file_path)
new_file_path = _rename_file(file_path, sha256_hash)
API.upload_file(
path_or_fileobj=new_file_path,
path_in_repo=new_file_path.split("/")[-1],
token=TOKEN,
repo_type="dataset",
repo_id="Felix92/docTR-multilingual-data-collection",
)
def upload_to_hub(file_upload, camera_upload, agree):
try:
if not agree:
return gr.Markdown("You must agree to the terms and conditions before proceeding."), None
if file_upload:
if file_upload.endswith(".pdf"):
with tempfile.TemporaryDirectory() as path:
for file_path in convert_from_path(file_upload, output_folder=path, paths_only=True, fmt='png')[:10]:
_upload_hub(file_path)
if camera_upload:
_upload_hub(camera_upload)
return gr.update(visible=False), gr.Markdown("""
Upload was successful! You can upload another document.
"""), gr.update(value=None), gr.update(value=None)
except Exception as e:
return gr.update(visible=False), gr.Markdown(f"""An error occured: {e}
"""), gr.update(value=None), gr.update(value=None)
with gr.Blocks(fill_height=True) as demo:
agreement_markdown = gr.Markdown(
"""
Document Upload Agreement
This is a Hugging Face space for the docTR/OnnxTR community to collect multilingual data for the following project/s:
You can upload PDF (up to 10 sites), JPG, or PNG files to this space via file upload or webcam / from mobile phone. The uploaded documents will be used to train and evaluate models for the docTR/OnnxTR projects.
By uploading a document, you explicitly agree to the following terms:
1. You affirm that you are the owner or have the necessary rights to upload and share the document.
2. You agree that the uploaded document will be made publicly available to everyone.
3. You agree that the uploaded document can be used for any purpose, including commercial use, by any third party.
"""
)
agree_button = gr.Button("I Agree to the Terms and Conditions")
agree_state = gr.State(value=False) # State to store agreement status
with gr.Column(visible=False) as upload_section:
success_message = gr.Markdown(visible=True)
gr.Markdown("Upload a document via camera capture.")
camera_upload = gr.Image(label="Upload Image [JPG | PNG] via camera", type="filepath", sources=["webcam"], mirror_webcam=False)
gr.Markdown("Upload a document via file upload.")
file_upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"], type="filepath")
submit_button = gr.Button("Submit")
def toggle_agreement_visibility():
return gr.update(visible=False), gr.update(visible=False), True, gr.update(visible=True)
# Clicking the "I Agree" button hides the agreement and shows the upload section
agree_button.click(fn=toggle_agreement_visibility, inputs=None, outputs=[agreement_markdown, agree_button, agree_state, upload_section])
submit_button.click(fn=upload_to_hub, inputs=[file_upload, camera_upload, agree_state], outputs=[agree_button, success_message, file_upload, camera_upload])
if __name__ == "__main__":
demo.launch()