Spaces:

Felix92
/

docTR-multilingual-Datacollector

Running

App Files Files Community

docTR-multilingual-Datacollector / app.py

Felix92

init

5b025ea 28 days ago

raw

history blame

4.56 kB

	from os import environ
	import gradio as gr
	from huggingface_hub import HfApi
	import subprocess
	from pdf2image import convert_from_path
	import tempfile

	API = HfApi()
	TOKEN = environ.get("TOKEN")

	def _sha256sum_creation(file_path: str) -> str:
	sha_hash = subprocess.run(["sha256sum", file_path], stdout=subprocess.PIPE, text=True).stdout.split()[0]
	return sha_hash

	def _rename_file(file_path: str, sha_hash: str) -> str:
	new_file_path = file_path.replace(file_path.split("/")[-1], sha_hash + "_" + file_path.split("/")[-1])
	subprocess.run(["mv", file_path, new_file_path])
	return new_file_path

	def _upload_hub(file_path: str) -> None:
	sha256_hash = _sha256sum_creation(file_path)
	new_file_path = _rename_file(file_path, sha256_hash)
	API.upload_file(
	path_or_fileobj=new_file_path,
	path_in_repo=new_file_path.split("/")[-1],
	token=TOKEN,
	repo_type="dataset",
	repo_id="Felix92/docTR-multilingual-data-collection",
	)

	def upload_to_hub(file_upload, camera_upload, agree):
	try:
	if not agree:
	return gr.Markdown("You must agree to the terms and conditions before proceeding."), None

	if file_upload:
	if file_upload.endswith(".pdf"):
	with tempfile.TemporaryDirectory() as path:
	for file_path in convert_from_path(file_upload, output_folder=path, paths_only=True, fmt='png')[:10]:
	_upload_hub(file_path)
	if camera_upload:
	_upload_hub(camera_upload)

	return gr.update(visible=False), gr.Markdown("""<div style="text-align: center;"><h3>Upload was successful! You can upload another document.</h3></div>"""), gr.update(value=None), gr.update(value=None)
	except Exception as e:
	return gr.update(visible=False), gr.Markdown(f"""<div style="text-align: center;"><h3>An error occured: {e}</h3></div>"""), gr.update(value=None), gr.update(value=None)

	with gr.Blocks(fill_height=True) as demo:
	agreement_markdown = gr.Markdown(
	"""
	<div style="text-align: center;">
	<h1>Document Upload Agreement</h1>

	<h3>This is a Hugging Face space for the docTR/OnnxTR community to collect multilingual data for the following project/s:</h3>

	<h3><a href="https://github.com/mindee/doctr">docTR</a></h3>

	<h3><a href="https://github.com/felixdittrich92/OnnxTR">OnnxTR</a></h3>
	</div>

	<h3>You can upload PDF (up to 10 sites), JPG, or PNG files to this space via file upload or webcam / from mobile phone. The uploaded documents will be used to train and evaluate models for the docTR/OnnxTR projects.</h3>

	<h3>All uploaded files can be found here: <a href="https://huggingface.co/datasets/Felix92/docTR-multilingual-data-collection">Hugging Face dataset</a></h3>

	<br>
	<br>

	<h3>By uploading a document, you explicitly agree to the following terms:</h3>

	<h3>1. You affirm that you are the owner or have the necessary rights to upload and share the document.</h3>

	<h3>2. You agree that the uploaded document will be made publicly available to everyone.</h3>

	<h3>3. You agree that the uploaded document can be used for any purpose, including commercial use, by any third party.</h3>
	"""
	)

	agree_button = gr.Button("I Agree to the Terms and Conditions")
	agree_state = gr.State(value=False) # State to store agreement status

	with gr.Column(visible=False) as upload_section:
	success_message = gr.Markdown(visible=True)
	gr.Markdown("Upload a document via camera capture.")
	camera_upload = gr.Image(label="Upload Image [JPG \| PNG] via camera", type="filepath", sources=["webcam"], mirror_webcam=False)
	gr.Markdown("Upload a document via file upload.")
	file_upload = gr.File(label="Upload File [JPG \| PNG \| PDF]", file_types=["pdf", "jpg", "png"], type="filepath")
	submit_button = gr.Button("Submit")

	def toggle_agreement_visibility():
	return gr.update(visible=False), gr.update(visible=False), True, gr.update(visible=True)

	# Clicking the "I Agree" button hides the agreement and shows the upload section
	agree_button.click(fn=toggle_agreement_visibility, inputs=None, outputs=[agreement_markdown, agree_button, agree_state, upload_section])

	submit_button.click(fn=upload_to_hub, inputs=[file_upload, camera_upload, agree_state], outputs=[agree_button, success_message, file_upload, camera_upload])

	if __name__ == "__main__":
	demo.launch()