Spaces:
Running
Running
Adding dataset type validation
Browse files- app.py +10 -12
- notebooks/eda.json +1 -1
- notebooks/embeddings.json +1 -1
- notebooks/rag.json +1 -1
app.py
CHANGED
@@ -125,6 +125,8 @@ def generate_cells(dataset_id, notebook_title):
|
|
125 |
logging.info(f"Generating {notebook_title} notebook for dataset {dataset_id}")
|
126 |
cells = notebook_templates[notebook_title]["notebook_template"]
|
127 |
notebook_type = notebook_templates[notebook_title]["notebook_type"]
|
|
|
|
|
128 |
try:
|
129 |
libraries = get_compatible_libraries(dataset_id)
|
130 |
except Exception as err:
|
@@ -155,22 +157,18 @@ def generate_cells(dataset_id, notebook_title):
|
|
155 |
has_numeric_columns = len(df.select_dtypes(include=["number"]).columns) > 0
|
156 |
has_categoric_columns = len(df.select_dtypes(include=["object"]).columns) > 0
|
157 |
|
158 |
-
|
159 |
-
if
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
"",
|
165 |
-
"## β This dataset does not have categorical columns, which are required for Embeddings/RAG generation β",
|
166 |
-
)
|
167 |
-
if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
|
168 |
logging.error(
|
169 |
-
"Dataset does not have
|
170 |
)
|
171 |
return (
|
172 |
"",
|
173 |
-
"## β This dataset does not have
|
174 |
)
|
175 |
|
176 |
cells = replace_wildcards(
|
|
|
125 |
logging.info(f"Generating {notebook_title} notebook for dataset {dataset_id}")
|
126 |
cells = notebook_templates[notebook_title]["notebook_template"]
|
127 |
notebook_type = notebook_templates[notebook_title]["notebook_type"]
|
128 |
+
dataset_types = notebook_templates[notebook_title]["dataset_types"]
|
129 |
+
|
130 |
try:
|
131 |
libraries = get_compatible_libraries(dataset_id)
|
132 |
except Exception as err:
|
|
|
157 |
has_numeric_columns = len(df.select_dtypes(include=["number"]).columns) > 0
|
158 |
has_categoric_columns = len(df.select_dtypes(include=["object"]).columns) > 0
|
159 |
|
160 |
+
valid_dataset = False
|
161 |
+
if "text" in dataset_types and has_categoric_columns:
|
162 |
+
valid_dataset = True
|
163 |
+
if "numeric" in dataset_types and has_numeric_columns:
|
164 |
+
valid_dataset = True
|
165 |
+
if not valid_dataset:
|
|
|
|
|
|
|
|
|
166 |
logging.error(
|
167 |
+
f"Dataset does not have the column types needed for this notebook which expects to have {dataset_types} data types."
|
168 |
)
|
169 |
return (
|
170 |
"",
|
171 |
+
f"## β This dataset does not have {dataset_types} columns, which are required for this notebook type β",
|
172 |
)
|
173 |
|
174 |
cells = replace_wildcards(
|
notebooks/eda.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"notebook_title": "Exploratory data analysis (EDA)",
|
3 |
"notebook_type": "eda",
|
4 |
-
"
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|
|
|
1 |
{
|
2 |
"notebook_title": "Exploratory data analysis (EDA)",
|
3 |
"notebook_type": "eda",
|
4 |
+
"dataset_types": ["numeric", "text"],
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|
notebooks/embeddings.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"notebook_title": "Text Embeddings",
|
3 |
"notebook_type": "embeddings",
|
4 |
-
"
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|
|
|
1 |
{
|
2 |
"notebook_title": "Text Embeddings",
|
3 |
"notebook_type": "embeddings",
|
4 |
+
"dataset_types": ["text"],
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|
notebooks/rag.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"notebook_title": "Retrieval-augmented generation (RAG)",
|
3 |
"notebook_type": "rag",
|
4 |
-
"
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|
|
|
1 |
{
|
2 |
"notebook_title": "Retrieval-augmented generation (RAG)",
|
3 |
"notebook_type": "rag",
|
4 |
+
"dataset_types": ["text"],
|
5 |
"notebook_template": [
|
6 |
{
|
7 |
"cell_type": "markdown",
|