Spaces:
Runtime error
Runtime error
NimaBoscarino
commited on
Commit
•
7a3d7a6
1
Parent(s):
998362d
For Ezi
Browse files- .idea/.gitignore +8 -0
- .idea/inspectionProfiles/Project_Default.xml +30 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/model-card-regulatory-check.iml +8 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- Dockerfile +27 -0
- __pycache__/main.cpython-310.pyc +0 -0
- __pycache__/server.cpython-310.pyc +0 -0
- main.py +154 -0
- requirements.txt +5 -0
- server.py +25 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Editor-based HTTP Client requests
|
5 |
+
/httpRequests/
|
6 |
+
# Datasource local storage ignored files
|
7 |
+
/dataSources/
|
8 |
+
/dataSources.local.xml
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredPackages">
|
6 |
+
<value>
|
7 |
+
<list size="1">
|
8 |
+
<item index="0" class="java.lang.String" itemvalue="pytest-runner" />
|
9 |
+
</list>
|
10 |
+
</value>
|
11 |
+
</option>
|
12 |
+
</inspection_tool>
|
13 |
+
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
14 |
+
<option name="ignoredErrors">
|
15 |
+
<list>
|
16 |
+
<option value="N801" />
|
17 |
+
<option value="N806" />
|
18 |
+
</list>
|
19 |
+
</option>
|
20 |
+
</inspection_tool>
|
21 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
22 |
+
<option name="ignoredIdentifiers">
|
23 |
+
<list>
|
24 |
+
<option value="dict.labels" />
|
25 |
+
<option value="geograpy.places.countries" />
|
26 |
+
</list>
|
27 |
+
</option>
|
28 |
+
</inspection_tool>
|
29 |
+
</profile>
|
30 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (sandbox)" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
.idea/model-card-regulatory-check.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="jdk" jdkName="Python 3.10 (sandbox)" jdkType="Python SDK" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/model-card-regulatory-check.iml" filepath="$PROJECT_DIR$/.idea/model-card-regulatory-check.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim-bullseye
|
2 |
+
|
3 |
+
# Set the working directory to /code
|
4 |
+
WORKDIR /code
|
5 |
+
|
6 |
+
# Copy the current directory contents into the container at /code
|
7 |
+
COPY ./requirements.txt /code/requirements.txt
|
8 |
+
|
9 |
+
# Install requirements.txt
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
# Set up a new user named "user" with user ID 1000
|
13 |
+
RUN useradd -m -u 1000 user
|
14 |
+
# Switch to the "user" user
|
15 |
+
USER user
|
16 |
+
# Set home to the user's home directory
|
17 |
+
ENV HOME=/home/user \
|
18 |
+
PATH=/home/user/.local/bin:$PATH
|
19 |
+
|
20 |
+
# Set the working directory to the user's home directory
|
21 |
+
WORKDIR $HOME/app
|
22 |
+
|
23 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
24 |
+
COPY --chown=user . $HOME/app
|
25 |
+
|
26 |
+
EXPOSE 7860
|
27 |
+
CMD ["uvicorn", "server:app","--proxy-headers", "--host", "0.0.0.0", "--port", "7860"]
|
__pycache__/main.cpython-310.pyc
ADDED
Binary file (5.1 kB). View file
|
|
__pycache__/server.cpython-310.pyc
ADDED
Binary file (803 Bytes). View file
|
|
main.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Dict, Any, Optional, List
|
3 |
+
import re
|
4 |
+
from abc import ABC, abstractmethod
|
5 |
+
|
6 |
+
from huggingface_hub import (ModelCard, comment_discussion,
|
7 |
+
create_discussion, get_discussion_details,
|
8 |
+
get_repo_discussions)
|
9 |
+
import markdown
|
10 |
+
from bs4 import BeautifulSoup
|
11 |
+
from tabulate import tabulate
|
12 |
+
from difflib import SequenceMatcher
|
13 |
+
|
14 |
+
KEY = os.environ.get("KEY")
|
15 |
+
|
16 |
+
|
17 |
+
def similar(a, b):
|
18 |
+
"""Check similarity of two sequences"""
|
19 |
+
return SequenceMatcher(None, a, b).ratio()
|
20 |
+
|
21 |
+
|
22 |
+
class ComplianceCheck(ABC):
|
23 |
+
def __init__(self, name):
|
24 |
+
self.name = name
|
25 |
+
|
26 |
+
@abstractmethod
|
27 |
+
def check(self, card: BeautifulSoup) -> bool:
|
28 |
+
raise NotImplementedError
|
29 |
+
|
30 |
+
|
31 |
+
class ModelProviderIdentityCheck(ComplianceCheck):
|
32 |
+
def __init__(self):
|
33 |
+
super().__init__("Identity and Contact Details")
|
34 |
+
|
35 |
+
def check(self, card: BeautifulSoup):
|
36 |
+
developed_by_li = card.findAll(text=re.compile("Developed by"))[0].parent.parent
|
37 |
+
developed_by = list(developed_by_li.children)[1].text.strip()
|
38 |
+
|
39 |
+
if developed_by == "[More Information Needed]":
|
40 |
+
return False
|
41 |
+
else:
|
42 |
+
return True
|
43 |
+
|
44 |
+
|
45 |
+
class IntendedPurposeCheck(ComplianceCheck):
|
46 |
+
def __init__(self):
|
47 |
+
super().__init__("Intended Purpose")
|
48 |
+
|
49 |
+
def check(self, card: BeautifulSoup):
|
50 |
+
|
51 |
+
# direct_use = card.find_all("h2", text="Direct Use")[0]
|
52 |
+
#
|
53 |
+
# if developed_by == "[More Information Needed]":
|
54 |
+
# return False
|
55 |
+
# else:
|
56 |
+
return False
|
57 |
+
|
58 |
+
|
59 |
+
compliance_checks = [
|
60 |
+
ModelProviderIdentityCheck(),
|
61 |
+
IntendedPurposeCheck()
|
62 |
+
# "General Limitations",
|
63 |
+
# "Computational and Hardware Requirements",
|
64 |
+
# "Carbon Emissions"
|
65 |
+
]
|
66 |
+
|
67 |
+
|
68 |
+
def parse_webhook_post(data: Dict[str, Any]) -> Optional[str]:
|
69 |
+
event = data["event"]
|
70 |
+
if event["scope"] != "repo":
|
71 |
+
return None
|
72 |
+
repo = data["repo"]
|
73 |
+
repo_name = repo["name"]
|
74 |
+
repo_type = repo["type"]
|
75 |
+
if repo_type != "model":
|
76 |
+
raise ValueError("Incorrect repo type.")
|
77 |
+
return repo_name
|
78 |
+
|
79 |
+
|
80 |
+
def check_compliance(comp_checks: List[ComplianceCheck], card: BeautifulSoup) -> Dict[str, bool]:
|
81 |
+
return {c.name: c.check(card) for c in comp_checks}
|
82 |
+
|
83 |
+
|
84 |
+
def run_compliance_check(repo_name):
|
85 |
+
card_data: ModelCard = ModelCard.load(repo_id_or_path=repo_name)
|
86 |
+
card_html = markdown.markdown(card_data.content)
|
87 |
+
card_soup = BeautifulSoup(card_html, features="html.parser")
|
88 |
+
compliance_results = check_compliance(compliance_checks, card_soup)
|
89 |
+
|
90 |
+
return compliance_results
|
91 |
+
|
92 |
+
|
93 |
+
def create_metadata_breakdown_table(compliance_check_dictionary):
|
94 |
+
data = {k: v for k, v in compliance_check_dictionary.items()}
|
95 |
+
metadata_fields_column = list(data.keys())
|
96 |
+
metadata_values_column = list(data.values())
|
97 |
+
table_data = list(zip(metadata_fields_column, metadata_values_column))
|
98 |
+
return tabulate(
|
99 |
+
table_data, tablefmt="github", headers=("Compliance Check", "Present")
|
100 |
+
)
|
101 |
+
|
102 |
+
|
103 |
+
def create_markdown_report(
|
104 |
+
desired_metadata_dictionary, repo_name, update: bool = False
|
105 |
+
):
|
106 |
+
report = f"""# Model Card Regulatory Compliance report card {"(updated)" if update else ""}
|
107 |
+
\n
|
108 |
+
This is an automatically produced model card regulatory compliance report card for {repo_name}.
|
109 |
+
This report is meant as a POC!
|
110 |
+
\n
|
111 |
+
## Breakdown of metadata fields for your model
|
112 |
+
\n
|
113 |
+
{create_metadata_breakdown_table(desired_metadata_dictionary)}
|
114 |
+
\n
|
115 |
+
"""
|
116 |
+
return report
|
117 |
+
|
118 |
+
|
119 |
+
def create_or_update_report(compliance_check, repo_name):
|
120 |
+
report = create_markdown_report(
|
121 |
+
compliance_check, repo_name, update=False
|
122 |
+
)
|
123 |
+
repo_discussions = get_repo_discussions(
|
124 |
+
repo_name,
|
125 |
+
repo_type="model",
|
126 |
+
)
|
127 |
+
for discussion in repo_discussions:
|
128 |
+
if (
|
129 |
+
discussion.title == "Metadata Report Card" and discussion.status == "open"
|
130 |
+
): # An existing open report card thread
|
131 |
+
discussion_details = get_discussion_details(
|
132 |
+
repo_name, discussion.num, repo_type="model"
|
133 |
+
)
|
134 |
+
last_comment = discussion_details.events[-1].content
|
135 |
+
if similar(report, last_comment) <= 0.999:
|
136 |
+
report = create_markdown_report(
|
137 |
+
compliance_check,
|
138 |
+
repo_name,
|
139 |
+
update=True,
|
140 |
+
)
|
141 |
+
comment_discussion(
|
142 |
+
repo_name,
|
143 |
+
discussion.num,
|
144 |
+
comment=report,
|
145 |
+
repo_type="model",
|
146 |
+
)
|
147 |
+
return True
|
148 |
+
create_discussion(
|
149 |
+
repo_name,
|
150 |
+
"Model Card Regulatory Compliance Report Card",
|
151 |
+
description=report,
|
152 |
+
repo_type="model",
|
153 |
+
)
|
154 |
+
return True
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
markdown
|
4 |
+
beautifulsoup4
|
5 |
+
tabulate
|
server.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from fastapi import FastAPI, Request, Response
|
3 |
+
from main import parse_webhook_post, run_compliance_check, create_or_update_report
|
4 |
+
|
5 |
+
KEY = os.environ.get("KEY")
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
|
10 |
+
@app.post("/webhook")
|
11 |
+
async def webhook(request: Request):
|
12 |
+
if request.method == "POST":
|
13 |
+
# if request.headers.get("X-Webhook-Secret") != KEY:
|
14 |
+
# return Response("Invalid secret", status_code=401)
|
15 |
+
|
16 |
+
data = await request.json()
|
17 |
+
|
18 |
+
if parsed_post := parse_webhook_post(data):
|
19 |
+
repo_name = parsed_post
|
20 |
+
else:
|
21 |
+
return Response("Unable to parse webhook data", status_code=400)
|
22 |
+
|
23 |
+
compliance_check = run_compliance_check(repo_name)
|
24 |
+
result = create_or_update_report(compliance_check, repo_name)
|
25 |
+
return result
|