Spaces:

diffusers
/

pr-test-analyzer

Sleeping

App Files Files Community

sayakpaul HF staff commited on Mar 26

Commit

bc69905

•

1 Parent(s): 226e982

Upload 3 files

Browse files

Files changed (3) hide show

app.py +32 -0
requirements.txt +3 -0
utils.py +133 -0

app.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+import utils
+DESCRIPTION = """
+This Space helps you find out the top-n slow tests from a particular GitHub Action run step. It also buckets the tests w.r.t their durations.
+"""
+ARTICLE = """
+To obtain the article name you're looking for, you need to scroll down the run page (for [example](https://github.com/huggingface/diffusers/actions/runs/8430950874/)) and select one from the 'Artifacts' section.
+"""
+with gr.Interface(
+    fn=utils.analyze_tests,
+    inputs=[
+        gr.Textbox(info="GitHub repository ID", placeholder="huggingface/diffusers"),
+        gr.Textbox(placeholder="GitHub token", type="password"),
+        gr.Textbox(placeholder="GitHub Action run ID"),
+        gr.Textbox(info="Artifact name", placeholder="pr_flax_cpu_test_reports"),
+        gr.Slider(2, 20, value=1, label="top-n", info="Top-n slow tests."),
+    ],
+    outputs=gr.Markdown(label="output"),
+     examples=[
+        ['huggingface/diffusers', 'ghp_XXX', '8430950874', 'pr_torch_cpu_pipelines_test_reports', 5],
+    ],
+    title="Short analysis of PR tests!",
+    description=DESCRIPTION,
+    article=ARTICLE,
+    allow_flagging="never",
+    cache_examples=False,
+) as demo:
+    demo.queue()
+    demo.launch(show_error=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+zipfile
+tempfile
+requests

utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import requests
+import zipfile
+import tempfile
+def group_tests_by_duration(file_path: str) -> dict:
+    # Define the buckets and their labels
+    buckets = [(0, 5), (5, 10), (10, 15), (15, 20), (20, float('inf'))]
+    bucket_names = ["0-5s", "5-10s", "10-15s", "15-20s", ">20s"]
+    test_groups = {name: [] for name in bucket_names}
+    # Process the file with error handling
+    with open(file_path, 'r') as file:
+        for line in file:
+            try:
+                parts = line.split()
+                # Extracting duration and test name, ignoring lines that don't match expected format
+                if len(parts) >= 3 and 's' in parts[0]:
+                    duration = float(parts[0].rstrip('s'))  # Remove 's' and convert to float
+                    test_name = ' '.join(parts[2:])  # Join back the test name parts
+                    # Assign test to the correct bucket based on duration
+                    for (start, end), bucket_name in zip(buckets, bucket_names):
+                        if start <= duration < end:
+                            test_groups[bucket_name].append((duration, test_name))
+                            break
+            except ValueError:
+                # Skip lines that cannot be parsed properly
+                continue
+    return test_groups
+def extract_top_n_tests(file_path, n=10):
+    test_durations = []
+    # Reading and processing the file
+    with open(file_path, 'r') as file:
+        for line in file:
+            parts = line.split()
+            if len(parts) >= 3 and parts[1] == 'call':
+                duration_s = parts[0].rstrip('s')  # Remove the 's' from the duration
+                try:
+                    duration = float(duration_s)
+                    test_name = ' '.join(parts[2:])
+                    test_durations.append((duration, test_name))
+                except ValueError:
+                    # Skip lines that cannot be converted to float
+                    continue
+    # Sort the list in descending order of duration
+    test_durations.sort(reverse=True, key=lambda x: x[0])
+    # Extract the top N tests
+    top_n_tests = {test[1]: f"{test[0]}s"
+                   for i, test in enumerate(test_durations[:n])}
+    return top_n_tests
+def fetch_test_duration_artifact(repo_id, token, run_id, artifact_name):
+    # Construct the API URL
+    owner_repo = repo_id.split("/")
+    artifacts_url = f'https://api.github.com/repos/{owner_repo[0]}/{owner_repo[1]}/actions/runs/{run_id}/artifacts'
+    # Set up the headers with your authentication token
+    headers = {'Authorization': f'token {token}'}
+    # Send the request to get a list of artifacts from the specified run
+    response = requests.get(artifacts_url, headers=headers)
+    response.raise_for_status()  # Raise an exception for HTTP error responses
+    # Search for the artifact with the specified name
+    download_url = None
+    for artifact in response.json().get('artifacts', []):
+        if artifact['name'] == artifact_name:
+            download_url = artifact['archive_download_url']
+            break
+    if download_url:
+        # Download the artifact
+        download_response = requests.get(download_url, headers=headers, stream=True)
+        download_response.raise_for_status()
+        # Save the downloaded artifact to a file
+        zip_file_path = f'{artifact_name}.zip'
+        with open(zip_file_path, 'wb') as file:
+            for chunk in download_response.iter_content(chunk_size=128):
+                file.write(chunk)
+        # Extract the duration text file
+        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+            # Check if the specified file exists in the zip
+            zip_files = zip_ref.namelist()
+            for file in zip_files:
+                if "duration" in file:
+                    zip_ref.extract(file, ".")
+                    break
+        return file
+    else:
+        raise ValueError("Error 🥲")
+def format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name):
+    run_url = f"https://github.com/{repo_id}/actions/runs/{run_id}/"
+    markdown_str = f"""
+## Top {len(top_n_slow_tests)} slow test for {artifact_name}\n
+"""
+    for test, duration in top_n_slow_tests.items():
+        markdown_str += f"* {test.split('/')[-1]}: {duration}\n"
+    markdown_str += """
+## Bucketed durations of the tests\n
+"""
+    for bucket, num_tests in test_bucket_map.items():
+        if ">" in bucket:
+            bucket = f"\{bucket}"
+        markdown_str += f"* {bucket}: {num_tests} tests\n"
+    markdown_str += f"\nRun URL: [{run_url}]({run_url})."
+    return markdown_str
+def analyze_tests(repo_id, token, run_id, artifact_name, top_n):
+    test_duration_file = fetch_test_duration_artifact(repo_id=repo_id, token=token, run_id=run_id, artifact_name=artifact_name)
+    grouped_tests_map = group_tests_by_duration(test_duration_file)
+    test_bucket_map = {bucket: len(tests) for bucket, tests in grouped_tests_map.items()}
+    print(test_bucket_map)
+    top_n_slow_tests = extract_top_n_tests(test_duration_file, n=top_n)
+    print(top_n_slow_tests)
+    return format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name)