NimaBoscarino commited on
Commit
25bf2cc
1 Parent(s): b4f5e30

WIP: initial version of checks, creating a Gradio UI for the app

Browse files
Files changed (7) hide show
  1. app.py +38 -12
  2. bloom_card.py +147 -0
  3. compliance_checks.py +19 -6
  4. main.py +0 -78
  5. server.py +0 -25
  6. tests/conftest.py +7 -0
  7. tests/test_compliance_checks.py +144 -6
app.py CHANGED
@@ -1,28 +1,54 @@
1
  import gradio as gr
2
- from huggingface_hub import ModelCard
3
  from compliance_checks import (
4
  ComplianceSuite,
5
  ModelProviderIdentityCheck,
6
- IntendedPurposeCheck
 
 
7
  )
8
 
 
9
 
10
- def run_compliance_check(repo_name):
11
- model_card = ModelCard.load(repo_id_or_path=repo_name).content
12
 
 
13
  suite = ComplianceSuite(checks=[
14
  ModelProviderIdentityCheck(),
15
- IntendedPurposeCheck()
 
 
16
  ])
17
 
18
  results = suite.run(model_card)
19
 
20
- return str(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
22
 
23
- gr.Interface(
24
- fn=run_compliance_check,
25
- inputs="text",
26
- outputs="text",
27
- examples=[["society-ethics/model-card-webhook-test"]]
28
- ).launch()
 
1
  import gradio as gr
2
+
3
  from compliance_checks import (
4
  ComplianceSuite,
5
  ModelProviderIdentityCheck,
6
+ IntendedPurposeCheck,
7
+ GeneralLimitationsCheck,
8
+ ComputationalRequirementsCheck,
9
  )
10
 
11
+ from bloom_card import bloom_card
12
 
 
 
13
 
14
+ def run_compliance_check(model_card: str):
15
  suite = ComplianceSuite(checks=[
16
  ModelProviderIdentityCheck(),
17
+ IntendedPurposeCheck(),
18
+ GeneralLimitationsCheck(),
19
+ ComputationalRequirementsCheck(),
20
  ])
21
 
22
  results = suite.run(model_card)
23
 
24
+ return str([r[0] for r in results])
25
+
26
+
27
+ with gr.Blocks() as demo:
28
+ gr.Markdown("""\
29
+ # Model Card Validator
30
+ Following Article 13 of the EU AI Act
31
+ """)
32
+
33
+ with gr.Row():
34
+ with gr.Column():
35
+ model_card_box = gr.TextArea()
36
+ populate_sample = gr.Button(value="Populate Sample")
37
+ submit = gr.Button()
38
+
39
+ with gr.Column():
40
+ results_list = gr.Text()
41
+
42
+ submit.click(
43
+ fn=run_compliance_check,
44
+ inputs=[model_card_box],
45
+ outputs=[results_list]
46
+ )
47
 
48
+ populate_sample.click(
49
+ fn=lambda: bloom_card,
50
+ inputs=[],
51
+ outputs=[model_card_box]
52
+ )
53
 
54
+ demo.launch()
 
 
 
 
 
bloom_card.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bloom_card = """\
2
+ # Model Details
3
+
4
+ BLOOM is an autoregressive Large Language Model (LLM), trained to continue text from a prompt on vast amounts of text data using industrial-scale computational resources. As such, it is able to output coherent text in 46 languages and 13 programming languages that is hardly distinguishable from text written by humans. BLOOM can also be instructed to perform text tasks it hasn't been explicitly trained for, by casting them as text generation tasks.
5
+
6
+ ## Basics
7
+ *This section provides information about the model type, version, license, funders, release date, developers, and contact information.*
8
+ *It is useful for anyone who wants to reference the model.*
9
+
10
+ **Developed by:** BigScience ([website](https://bigscience.huggingface.co))
11
+
12
+ *All collaborators are either volunteers or have an agreement with their employer. (Further breakdown of participants forthcoming.)*
13
+
14
+ **Model Type:** Transformer-based Language Model
15
+
16
+ **Checkpoints format:** `transformers` (Megatron-DeepSpeed format available [here](https://huggingface.co/bigscience/bloom-optimizer-states))
17
+
18
+ **Version:** 1.0.0
19
+
20
+ **Languages:** Multiple; see [training data](#training-data)
21
+
22
+ **License:** RAIL License v1.0 ([link](https://huggingface.co/spaces/bigscience/license) / [article and FAQ](https://bigscience.huggingface.co/blog/the-bigscience-rail-license))
23
+
24
+ **Release Date Estimate:** Monday, 11.July.2022
25
+
26
+ **Send Questions to:** [email protected]
27
+
28
+ **Cite as:** BigScience, _BigScience Language Open-science Open-access Multilingual (BLOOM) Language Model_. International, May 2021-May 2022
29
+
30
+ **Funded by:**
31
+
32
+ * The French government.
33
+
34
+ * Hugging Face ([website](https://huggingface.co)).
35
+
36
+ * Organizations of contributors. *(Further breakdown of organizations forthcoming.)*
37
+
38
+ ## Intended Use
39
+
40
+ This model is being created in order to enable public research on large language models (LLMs). LLMs are intended to be used for language generation or as a pretrained base model that can be further fine-tuned for specific tasks. Use cases below are not exhaustive.
41
+
42
+ ### Direct Use
43
+
44
+ - Text generation
45
+
46
+ - Exploring characteristics of language generated by a language model
47
+
48
+ - Examples: Cloze tests, counterfactuals, generations with reframings
49
+
50
+ ### Downstream Use
51
+
52
+ - Tasks that leverage language models include: Information Extraction, Question Answering, Summarization
53
+
54
+ ### Out-of-Scope Use
55
+
56
+ Using the model in [high-stakes](#high-stakes) settings is out of scope for this model. The model is not designed for [critical decisions](#critical-decisions) nor uses with any material consequences on an individual's livelihood or wellbeing. The model outputs content that appears factual but may not be correct.
57
+
58
+ Out-of-scope Uses Include:
59
+
60
+ - Usage in biomedical domains, political and legal domains, or finance domains
61
+
62
+ - Usage for evaluating or scoring individuals, such as for employment, education, or credit
63
+
64
+ - Applying the model for critical automatic decisions, generating factual content, creating reliable summaries, or generating predictions that must be correct
65
+
66
+ #### Misuse
67
+
68
+ Intentionally using the model for harm, violating [human rights](#human-rights), or other kinds of malicious activities, is a misuse of this model. This includes:
69
+
70
+ - Spam generation
71
+
72
+ - Disinformation and influence operations
73
+
74
+ - Disparagement and defamation
75
+
76
+ - Harassment and abuse
77
+
78
+ - [Deception](#deception)
79
+
80
+ - Unconsented impersonation and imitation
81
+
82
+ - Unconsented surveillance
83
+
84
+ - Generating content without attribution to the model, as specified in the [RAIL License, Use Restrictions](https://huggingface.co/spaces/bigscience/license)
85
+
86
+ ## Bias, Risks, and Limitations
87
+ *This section identifies foreseeable harms and misunderstandings.*
88
+
89
+ Model may:
90
+
91
+ - Overrepresent some viewpoints and underrepresent others
92
+
93
+ - Contain stereotypes
94
+
95
+ - Contain [personal information](#personal-data-and-information)
96
+
97
+ - Generate:
98
+
99
+ - Hateful, abusive, or violent language
100
+
101
+ - Discriminatory or prejudicial language
102
+
103
+ - Content that may not be appropriate for all settings, including sexual content
104
+
105
+ - Make errors, including producing incorrect information as if it were factual
106
+
107
+ - Generate irrelevant or repetitive outputs
108
+
109
+ - Induce users into attributing human traits to it, such as sentience or consciousness
110
+
111
+ ## Technical Specifications
112
+ *This section includes details about the model objective and architecture, and the compute infrastructure.*
113
+ *It is useful for people interested in model development.*
114
+
115
+ ### Compute infrastructure
116
+ Jean Zay Public Supercomputer, provided by the French government (see [announcement](https://www.enseignementsup-recherche.gouv.fr/fr/signature-du-marche-d-acquisition-de-l-un-des-supercalculateurs-les-plus-puissants-d-europe-46733)).
117
+
118
+ #### Hardware
119
+
120
+ * 384 A100 80GB GPUs (48 nodes)
121
+
122
+ * Additional 32 A100 80GB GPUs (4 nodes) in reserve
123
+
124
+ * 8 GPUs per node Using NVLink 4 inter-gpu connects, 4 OmniPath links
125
+
126
+ * CPU: AMD
127
+
128
+ * CPU memory: 512GB per node
129
+
130
+ * GPU memory: 640GB per node
131
+
132
+ * Inter-node connect: Omni-Path Architecture (OPA)
133
+
134
+ * NCCL-communications network: a fully dedicated subnet
135
+
136
+ * Disc IO network: shared network with other types of nodes
137
+
138
+ #### Software
139
+
140
+ * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
141
+
142
+ * DeepSpeed ([Github link](https://github.com/microsoft/DeepSpeed))
143
+
144
+ * PyTorch (pytorch-1.11 w/ CUDA-11.5; see [Github link](https://github.com/pytorch/pytorch))
145
+
146
+ * apex ([Github link](https://github.com/NVIDIA/apex))
147
+ """
compliance_checks.py CHANGED
@@ -6,16 +6,16 @@ from bs4 import BeautifulSoup, Comment
6
 
7
  class ComplianceCheck(ABC):
8
  @abstractmethod
9
- def run_check(self, card: BeautifulSoup) -> bool:
10
  raise NotImplementedError
11
 
12
 
13
  class ModelProviderIdentityCheck(ComplianceCheck):
14
  def run_check(self, card: BeautifulSoup):
15
  try:
16
- model_description = card.find("h3", string="Model Description")
17
- description_list = model_description.find_next_siblings()[0]
18
- developer = description_list.find(string="Developed by:").parent.next_sibling.strip()
19
 
20
  if developer == "[More Information Needed]":
21
  return False, None
@@ -26,6 +26,8 @@ class ModelProviderIdentityCheck(ComplianceCheck):
26
 
27
 
28
  def walk_to_next_heading(card, heading, heading_text):
 
 
29
  try:
30
  heading_node = card.find(heading, string=heading_text)
31
 
@@ -34,10 +36,10 @@ def walk_to_next_heading(card, heading, heading_text):
34
  sibling_gen = heading_node.nextSiblingGenerator()
35
  sibling = next(sibling_gen)
36
 
37
- while not (sibling.name is not None and sibling.name.startswith("h")) or sibling.name is None:
38
  if not isinstance(sibling, Comment):
39
  content = content + sibling.text.strip()
40
- sibling = next(sibling_gen)
41
 
42
  if content.strip() == "[More Information Needed]":
43
  return False, None
@@ -50,6 +52,7 @@ def walk_to_next_heading(card, heading, heading_text):
50
  class IntendedPurposeCheck(ComplianceCheck):
51
  def run_check(self, card: BeautifulSoup):
52
  direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
 
53
  downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
54
  out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
55
  return (
@@ -58,6 +61,16 @@ class IntendedPurposeCheck(ComplianceCheck):
58
  )
59
 
60
 
 
 
 
 
 
 
 
 
 
 
61
  class ComplianceSuite:
62
  def __init__(self, checks):
63
  self.checks = checks
 
6
 
7
  class ComplianceCheck(ABC):
8
  @abstractmethod
9
+ def run_check(self, card: BeautifulSoup):
10
  raise NotImplementedError
11
 
12
 
13
  class ModelProviderIdentityCheck(ComplianceCheck):
14
  def run_check(self, card: BeautifulSoup):
15
  try:
16
+ developed_by = card.find("strong", string="Developed by:")
17
+
18
+ developer = "".join([str(s) for s in developed_by.next_siblings]).strip()
19
 
20
  if developer == "[More Information Needed]":
21
  return False, None
 
26
 
27
 
28
  def walk_to_next_heading(card, heading, heading_text):
29
+ stop_at = [heading, f"h{int(heading[1]) - 1}"]
30
+
31
  try:
32
  heading_node = card.find(heading, string=heading_text)
33
 
 
36
  sibling_gen = heading_node.nextSiblingGenerator()
37
  sibling = next(sibling_gen)
38
 
39
+ while sibling and (not (sibling.name is not None and sibling.name in stop_at) or sibling.name is None):
40
  if not isinstance(sibling, Comment):
41
  content = content + sibling.text.strip()
42
+ sibling = next(sibling_gen, None)
43
 
44
  if content.strip() == "[More Information Needed]":
45
  return False, None
 
52
  class IntendedPurposeCheck(ComplianceCheck):
53
  def run_check(self, card: BeautifulSoup):
54
  direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
55
+ # TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
56
  downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
57
  out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
58
  return (
 
61
  )
62
 
63
 
64
+ class GeneralLimitationsCheck(ComplianceCheck):
65
+ def run_check(self, card: BeautifulSoup):
66
+ return walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations")
67
+
68
+
69
+ class ComputationalRequirementsCheck(ComplianceCheck):
70
+ def run_check(self, card: BeautifulSoup):
71
+ return walk_to_next_heading(card, "h3", "Compute infrastructure")
72
+
73
+
74
  class ComplianceSuite:
75
  def __init__(self, checks):
76
  self.checks = checks
main.py DELETED
@@ -1,78 +0,0 @@
1
- import os
2
-
3
- from huggingface_hub import (comment_discussion,
4
- create_discussion, get_discussion_details,
5
- get_repo_discussions)
6
- from tabulate import tabulate
7
- from difflib import SequenceMatcher
8
-
9
- KEY = os.environ.get("KEY")
10
-
11
-
12
- def similar(a, b):
13
- """Check similarity of two sequences"""
14
- return SequenceMatcher(None, a, b).ratio()
15
-
16
-
17
- def create_metadata_breakdown_table(compliance_check_dictionary):
18
- data = {k: v for k, v in compliance_check_dictionary.items()}
19
- metadata_fields_column = list(data.keys())
20
- metadata_values_column = list(data.values())
21
- table_data = list(zip(metadata_fields_column, metadata_values_column))
22
- return tabulate(
23
- table_data, tablefmt="github", headers=("Compliance Check", "Present")
24
- )
25
-
26
-
27
- def create_markdown_report(
28
- desired_metadata_dictionary, repo_name, update: bool = False
29
- ):
30
- report = f"""# Model Card Regulatory Compliance report card {"(updated)" if update else ""}
31
- \n
32
- This is an automatically produced model card regulatory compliance report card for {repo_name}.
33
- This report is meant as a POC!
34
- \n
35
- ## Breakdown of metadata fields for your model
36
- \n
37
- {create_metadata_breakdown_table(desired_metadata_dictionary)}
38
- \n
39
- """
40
- return report
41
-
42
-
43
- def create_or_update_report(compliance_check, repo_name):
44
- report = create_markdown_report(
45
- compliance_check, repo_name, update=False
46
- )
47
- repo_discussions = get_repo_discussions(
48
- repo_name,
49
- repo_type="model",
50
- )
51
- for discussion in repo_discussions:
52
- if (
53
- discussion.title == "Metadata Report Card" and discussion.status == "open"
54
- ): # An existing open report card thread
55
- discussion_details = get_discussion_details(
56
- repo_name, discussion.num, repo_type="model"
57
- )
58
- last_comment = discussion_details.events[-1].content
59
- if similar(report, last_comment) <= 0.999:
60
- report = create_markdown_report(
61
- compliance_check,
62
- repo_name,
63
- update=True,
64
- )
65
- comment_discussion(
66
- repo_name,
67
- discussion.num,
68
- comment=report,
69
- repo_type="model",
70
- )
71
- return True
72
- create_discussion(
73
- repo_name,
74
- "Model Card Regulatory Compliance Report Card",
75
- description=report,
76
- repo_type="model",
77
- )
78
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server.py DELETED
@@ -1,25 +0,0 @@
1
- import os
2
- from fastapi import FastAPI, Request, Response
3
- from main import parse_webhook_post, run_compliance_check, create_or_update_report
4
-
5
- KEY = os.environ.get("KEY")
6
-
7
- app = FastAPI()
8
-
9
-
10
- @app.post("/webhook")
11
- async def webhook(request: Request):
12
- if request.method == "POST":
13
- # if request.headers.get("X-Webhook-Secret") != KEY:
14
- # return Response("Invalid secret", status_code=401)
15
-
16
- data = await request.json()
17
-
18
- if parsed_post := parse_webhook_post(data):
19
- repo_name = parsed_post
20
- else:
21
- return Response("Unable to parse webhook data", status_code=400)
22
-
23
- compliance_check = run_compliance_check(repo_name)
24
- result = create_or_update_report(compliance_check, repo_name)
25
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/conftest.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from bloom_card import bloom_card as bc
3
+
4
+ @pytest.fixture()
5
+ def bloom_card():
6
+ # TODO: Note, this is a heavily doctored version of the card.
7
+ return bc
tests/test_compliance_checks.py CHANGED
@@ -2,8 +2,23 @@ import pytest
2
  from unittest.mock import MagicMock
3
 
4
  import markdown
5
- from bs4 import BeautifulSoup, Comment
6
- from compliance_checks import ComplianceSuite, ModelProviderIdentityCheck, IntendedPurposeCheck
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  class TestComplianceCheck:
@@ -109,11 +124,92 @@ Some random info...
109
  [More Information Needed]
110
  """
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  @pytest.mark.parametrize("check, card,check_passed,values", [
113
  (ModelProviderIdentityCheck(), "provider_identity_model_card", True, "Nima Boscarino"),
114
  (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", False, None),
115
  (IntendedPurposeCheck(), "intended_purpose_model_card", True, ["Here is some info about direct uses...", None, "Here is some info about out-of-scope uses..."]),
116
  (IntendedPurposeCheck(), "bad_intended_purpose_model_card", False, [None, None, None]),
 
 
 
 
117
  ])
118
  def test_run_model_provider_identity_check(self, check, card, check_passed, values, request):
119
  card = request.getfixturevalue(card)
@@ -175,7 +271,7 @@ class TestComplianceSuite:
175
 
176
 
177
  class TestEndToEnd:
178
- @pytest.mark.parametrize("card", [
179
  ("""
180
  # Model Card for Sample Model
181
 
@@ -187,12 +283,54 @@ Some random info...
187
 
188
  - **Developed by:** Nima Boscarino
189
  - **Model type:** Yada yada yada
190
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  ])
192
- def test_end_to_end_compliance_suite(self, card):
 
 
 
193
  suite = ComplianceSuite(checks=[
194
  ModelProviderIdentityCheck(),
195
  IntendedPurposeCheck(),
 
 
196
  ])
197
 
198
- suite.run(card)
 
 
 
2
  from unittest.mock import MagicMock
3
 
4
  import markdown
5
+ from bs4 import BeautifulSoup
6
+ from compliance_checks import (
7
+ ComplianceSuite,
8
+ ModelProviderIdentityCheck,
9
+ IntendedPurposeCheck,
10
+ GeneralLimitationsCheck,
11
+ ComputationalRequirementsCheck,
12
+ )
13
+
14
+
15
+ expected_infrastructure = """\
16
+ Jean Zay Public Supercomputer, provided by the French government.\
17
+ Hardware\
18
+ 384 A100 80GB GPUs (48 nodes)\
19
+ Software\
20
+ Megatron-DeepSpeed (Github link)\
21
+ """
22
 
23
 
24
  class TestComplianceCheck:
 
124
  [More Information Needed]
125
  """
126
 
127
+ @pytest.fixture
128
+ def general_limitations_model_card(self):
129
+ return """
130
+ # Model Card for Sample Model
131
+
132
+ ## Some Random Header
133
+
134
+ ## Bias, Risks, and Limitations
135
+
136
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
137
+
138
+ Hello world! These are some risks...
139
+
140
+ ## More Things
141
+ """
142
+
143
+ @pytest.fixture
144
+ def bad_general_limitations_model_card(self):
145
+ return """
146
+ # Model Card for Sample Model
147
+
148
+ ## Some Random Header
149
+
150
+ ## Bias, Risks, and Limitations
151
+
152
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
153
+
154
+ [More Information Needed]
155
+
156
+ ## More Things
157
+ """
158
+
159
+ @pytest.fixture
160
+ def computational_requirements_model_card(self):
161
+ # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
162
+ return """
163
+ # Model Card for Sample Model
164
+
165
+ ## Some Random Header
166
+
167
+ ## Technical Specifications
168
+
169
+ ### Compute infrastructure
170
+ Jean Zay Public Supercomputer, provided by the French government.
171
+
172
+ #### Hardware
173
+
174
+ * 384 A100 80GB GPUs (48 nodes)
175
+
176
+ #### Software
177
+
178
+ * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
179
+ </details>
180
+
181
+ ## Intended Use
182
+
183
+ Etc..
184
+ """
185
+
186
+ @pytest.fixture
187
+ def bad_computational_requirements_model_card(self):
188
+ # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md
189
+ return """
190
+ # Model Card for Sample Model
191
+
192
+ ## Some Random Header
193
+
194
+ ## Technical Specifications
195
+
196
+ ### Compute infrastructure
197
+ [More Information Needed]
198
+
199
+ ## Intended Use
200
+
201
+ Etc..
202
+ """
203
+
204
  @pytest.mark.parametrize("check, card,check_passed,values", [
205
  (ModelProviderIdentityCheck(), "provider_identity_model_card", True, "Nima Boscarino"),
206
  (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", False, None),
207
  (IntendedPurposeCheck(), "intended_purpose_model_card", True, ["Here is some info about direct uses...", None, "Here is some info about out-of-scope uses..."]),
208
  (IntendedPurposeCheck(), "bad_intended_purpose_model_card", False, [None, None, None]),
209
+ (GeneralLimitationsCheck(), "general_limitations_model_card", True, "Hello world! These are some risks..."),
210
+ (GeneralLimitationsCheck(), "bad_general_limitations_model_card", False, None),
211
+ (ComputationalRequirementsCheck(), "computational_requirements_model_card", True, expected_infrastructure),
212
+ (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", False, None),
213
  ])
214
  def test_run_model_provider_identity_check(self, check, card, check_passed, values, request):
215
  card = request.getfixturevalue(card)
 
271
 
272
 
273
  class TestEndToEnd:
274
+ @pytest.mark.parametrize("card,fixture", [
275
  ("""
276
  # Model Card for Sample Model
277
 
 
283
 
284
  - **Developed by:** Nima Boscarino
285
  - **Model type:** Yada yada yada
286
+
287
+ ## Uses
288
+
289
+ ### Direct Use
290
+
291
+ Here is some info about direct uses...
292
+
293
+ ### Downstream Use [optional]
294
+
295
+ [More Information Needed]
296
+
297
+ ### Out-of-Scope Use
298
+
299
+ Here is some info about out-of-scope uses...
300
+
301
+ ## Bias, Risks, and Limitations
302
+
303
+ Hello world! These are some risks...
304
+
305
+ ## Technical Specifications
306
+
307
+ ### Compute infrastructure
308
+ Jean Zay Public Supercomputer, provided by the French government.
309
+
310
+ #### Hardware
311
+
312
+ * 384 A100 80GB GPUs (48 nodes)
313
+
314
+ #### Software
315
+
316
+ * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed))
317
+ </details>
318
+
319
+ ## More Things
320
+ """, False),
321
+ ("bloom_card", True)
322
  ])
323
+ def test_end_to_end_compliance_suite(self, card, fixture, request):
324
+ if fixture:
325
+ card = request.getfixturevalue(card)
326
+
327
  suite = ComplianceSuite(checks=[
328
  ModelProviderIdentityCheck(),
329
  IntendedPurposeCheck(),
330
+ GeneralLimitationsCheck(),
331
+ ComputationalRequirementsCheck()
332
  ])
333
 
334
+ results = suite.run(card)
335
+
336
+ assert all([r[0] for r in results])