Spaces:

asoria
/

auto-notebook-creator

Running

asoria HF staff commited on Sep 18

Commit

713d673

•

1 Parent(s): 0b212ec

Adding dataset type validation

Files changed (4) hide show

app.py CHANGED Viewed

@@ -125,6 +125,8 @@ def generate_cells(dataset_id, notebook_title):
     logging.info(f"Generating {notebook_title} notebook for dataset {dataset_id}")
     cells = notebook_templates[notebook_title]["notebook_template"]
     notebook_type = notebook_templates[notebook_title]["notebook_type"]
     try:
         libraries = get_compatible_libraries(dataset_id)
     except Exception as err:
@@ -155,22 +157,18 @@ def generate_cells(dataset_id, notebook_title):
     has_numeric_columns = len(df.select_dtypes(include=["number"]).columns) > 0
     has_categoric_columns = len(df.select_dtypes(include=["object"]).columns) > 0
-    # TODO: Validate by notebook type
-    if notebook_type in ("rag", "embeddings") and not has_categoric_columns:
-        logging.error(
-            "Dataset does not have categorical columns, which are required for RAG generation."
-        )
-        return (
-            "",
-            "## ❌ This dataset does not have categorical columns, which are required for Embeddings/RAG generation ❌",
-        )
-    if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
         logging.error(
-            "Dataset does not have categorical or numeric columns, which are required for EDA generation."
         )
         return (
             "",
-            "## ❌ This dataset does not have categorical or numeric columns, which are required for EDA generation ❌",
         )
     cells = replace_wildcards(

     logging.info(f"Generating {notebook_title} notebook for dataset {dataset_id}")
     cells = notebook_templates[notebook_title]["notebook_template"]
     notebook_type = notebook_templates[notebook_title]["notebook_type"]
+    dataset_types = notebook_templates[notebook_title]["dataset_types"]
     try:
         libraries = get_compatible_libraries(dataset_id)
     except Exception as err:
     has_numeric_columns = len(df.select_dtypes(include=["number"]).columns) > 0
     has_categoric_columns = len(df.select_dtypes(include=["object"]).columns) > 0
+    valid_dataset = False
+    if "text" in dataset_types and has_categoric_columns:
+        valid_dataset = True
+    if "numeric" in dataset_types and has_numeric_columns:
+        valid_dataset = True
+    if not valid_dataset:
         logging.error(
+            f"Dataset does not have the column types needed for this notebook which expects to have {dataset_types} data types."
         )
         return (
             "",
+            f"## ❌ This dataset does not have {dataset_types} columns, which are required for this notebook type ❌",
         )
     cells = replace_wildcards(

notebooks/eda.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "notebook_title": "Exploratory data analysis (EDA)",
     "notebook_type": "eda",
-    "dataset_type": "numeric",
     "notebook_template": [
         {
             "cell_type": "markdown",

 {
     "notebook_title": "Exploratory data analysis (EDA)",
     "notebook_type": "eda",
+    "dataset_types": ["numeric", "text"],
     "notebook_template": [
         {
             "cell_type": "markdown",

notebooks/embeddings.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "notebook_title": "Text Embeddings",
     "notebook_type": "embeddings",
-    "dataset_type": "text",
     "notebook_template": [
         {
             "cell_type": "markdown",

 {
     "notebook_title": "Text Embeddings",
     "notebook_type": "embeddings",
+    "dataset_types": ["text"],
     "notebook_template": [
         {
             "cell_type": "markdown",

notebooks/rag.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "notebook_title": "Retrieval-augmented generation (RAG)",
     "notebook_type": "rag",
-    "dataset_type": "text",
     "notebook_template": [
         {
             "cell_type": "markdown",

 {
     "notebook_title": "Retrieval-augmented generation (RAG)",
     "notebook_type": "rag",
+    "dataset_types": ["text"],
     "notebook_template": [
         {
             "cell_type": "markdown",