Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
app.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### ----------------------------- ###
|
2 |
+
### libraries ###
|
3 |
+
### ----------------------------- ###
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
from sklearn.linear_model import LogisticRegression
|
10 |
+
from sklearn import metrics
|
11 |
+
|
12 |
+
|
13 |
+
### ------------------------------ ###
|
14 |
+
### data transformation ###
|
15 |
+
### ------------------------------ ###
|
16 |
+
|
17 |
+
# load dataset
|
18 |
+
uncleaned_data = pd.read_csv('data.csv')
|
19 |
+
|
20 |
+
# remove timestamp from dataset (always first column)
|
21 |
+
uncleaned_data = uncleaned_data.iloc[: , 1:]
|
22 |
+
data = pd.DataFrame()
|
23 |
+
|
24 |
+
# keep track of which columns are categorical and what
|
25 |
+
# those columns' value mappings are
|
26 |
+
# structure: {colname1: {...}, colname2: {...} }
|
27 |
+
cat_value_dicts = {}
|
28 |
+
final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
|
29 |
+
|
30 |
+
# for each column...
|
31 |
+
for (colname, colval) in uncleaned_data.iteritems():
|
32 |
+
|
33 |
+
# check if col is already a number; if so, add col directly
|
34 |
+
# to new dataframe and skip to next column
|
35 |
+
if isinstance(colval.values[0], (np.integer, float)):
|
36 |
+
data[colname] = uncleaned_data[colname].copy()
|
37 |
+
continue
|
38 |
+
|
39 |
+
# structure: {0: "lilac", 1: "blue", ...}
|
40 |
+
new_dict = {}
|
41 |
+
val = 0 # first index per column
|
42 |
+
transformed_col_vals = [] # new numeric datapoints
|
43 |
+
|
44 |
+
# if not, for each item in that column...
|
45 |
+
for (row, item) in enumerate(colval.values):
|
46 |
+
|
47 |
+
# if item is not in this col's dict...
|
48 |
+
if item not in new_dict:
|
49 |
+
new_dict[item] = val
|
50 |
+
val += 1
|
51 |
+
|
52 |
+
# then add numerical value to transformed dataframe
|
53 |
+
transformed_col_vals.append(new_dict[item])
|
54 |
+
|
55 |
+
# reverse dictionary only for final col (0, 1) => (vals)
|
56 |
+
if colname == final_colname:
|
57 |
+
new_dict = {value : key for (key, value) in new_dict.items()}
|
58 |
+
|
59 |
+
cat_value_dicts[colname] = new_dict
|
60 |
+
data[colname] = transformed_col_vals
|
61 |
+
|
62 |
+
|
63 |
+
### -------------------------------- ###
|
64 |
+
### model training ###
|
65 |
+
### -------------------------------- ###
|
66 |
+
|
67 |
+
# select features and predicton; automatically selects last column as prediction
|
68 |
+
cols = len(data.columns)
|
69 |
+
num_features = cols - 1
|
70 |
+
x = data.iloc[: , :num_features]
|
71 |
+
y = data.iloc[: , num_features:]
|
72 |
+
|
73 |
+
# split data into training and testing sets
|
74 |
+
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
|
75 |
+
|
76 |
+
# instantiate the model (using default parameters)
|
77 |
+
model = LogisticRegression()
|
78 |
+
model.fit(x_train, y_train.values.ravel())
|
79 |
+
y_pred = model.predict(x_test)
|
80 |
+
|
81 |
+
|
82 |
+
### -------------------------------- ###
|
83 |
+
### article generation ###
|
84 |
+
### -------------------------------- ###
|
85 |
+
# borrow file reading function from reader.py
|
86 |
+
|
87 |
+
def get_feat():
|
88 |
+
feats = [abs(x) for x in model.coef_[0]]
|
89 |
+
max_val = max(feats)
|
90 |
+
idx = feats.index(max_val)
|
91 |
+
return data.columns[idx]
|
92 |
+
|
93 |
+
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
|
94 |
+
most_imp_feat = get_feat()
|
95 |
+
# info = get_article(acc, most_imp_feat)
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
### ------------------------------- ###
|
100 |
+
### interface creation ###
|
101 |
+
### ------------------------------- ###
|
102 |
+
|
103 |
+
|
104 |
+
# predictor for generic number of features
|
105 |
+
def general_predictor(*args):
|
106 |
+
features = []
|
107 |
+
|
108 |
+
# transform categorical input
|
109 |
+
for colname, arg in zip(data.columns, args):
|
110 |
+
if (colname in cat_value_dicts):
|
111 |
+
features.append(cat_value_dicts[colname][arg])
|
112 |
+
else:
|
113 |
+
features.append(arg)
|
114 |
+
|
115 |
+
# predict single datapoint
|
116 |
+
new_input = [features]
|
117 |
+
result = model.predict(new_input)
|
118 |
+
return cat_value_dicts[final_colname][result[0]]
|
119 |
+
|
120 |
+
# add data labels to replace those lost via star-args
|
121 |
+
|
122 |
+
|
123 |
+
block = gr.Blocks()
|
124 |
+
|
125 |
+
with open('info.md') as f:
|
126 |
+
with block:
|
127 |
+
gr.Markdown(f.readline())
|
128 |
+
gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
|
129 |
+
|
130 |
+
with gr.Row():
|
131 |
+
with gr.Box():
|
132 |
+
inputls = []
|
133 |
+
for colname in data.columns:
|
134 |
+
# skip last column
|
135 |
+
if colname == final_colname:
|
136 |
+
continue
|
137 |
+
|
138 |
+
# access categories dict if data is categorical
|
139 |
+
# otherwise, just use a number input
|
140 |
+
if colname in cat_value_dicts:
|
141 |
+
radio_options = list(cat_value_dicts[colname].keys())
|
142 |
+
inputls.append(gr.inputs.Dropdown(choices=radio_options, type="value", label=colname))
|
143 |
+
else:
|
144 |
+
# add numerical input
|
145 |
+
inputls.append(gr.inputs.Number(label=colname))
|
146 |
+
gr.Markdown("<br />")
|
147 |
+
|
148 |
+
submit = gr.Button("Click to see your personalized result!", variant="primary")
|
149 |
+
gr.Markdown("<br />")
|
150 |
+
output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
|
151 |
+
|
152 |
+
submit.click(fn=general_predictor, inputs=inputls, outputs=output)
|
153 |
+
gr.Markdown("<br />")
|
154 |
+
|
155 |
+
with gr.Row():
|
156 |
+
with gr.Box():
|
157 |
+
gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
|
158 |
+
with gr.Box():
|
159 |
+
gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
|
160 |
+
|
161 |
+
gr.Markdown("<br />")
|
162 |
+
|
163 |
+
with gr.Box():
|
164 |
+
gr.Markdown('''β Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
|
165 |
+
|
166 |
+
with gr.Box():
|
167 |
+
with open('info.md') as f:
|
168 |
+
f.readline()
|
169 |
+
gr.Markdown(f.read())
|
170 |
+
|
171 |
+
# show the interface
|
172 |
+
block.launch()
|
data.csv
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Timestamp,What genre are you interested in ,What is your ideal movie length,What Type of Movie do you Want to Watch,What type of rating do you want to watch,Do you prefer fiction or non fiction,What movie would you want to watch
|
2 |
+
2/13/2023 12:02:04,Action,2 hrs,Action movie,Pg 13,Fiction,Avengers
|
3 |
+
2/14/2023 8:24:08,Horror,1 1/2 hrs,Scary movie,R,Fiction,A Dog's Purpose
|
4 |
+
2/14/2023 8:25:24,Comedy,1 1/2 hrs,Happy movie,Pg 13,Non-fiction,Puss in boots
|
5 |
+
2/14/2023 8:27:00,Action,2 hrs,Action movie,R,Fiction,Avengers
|
6 |
+
2/14/2023 8:27:21,Kids Movie,1 1/2 hrs,Thought provoking movie,Pg 13,Fiction,Puss in boots
|
7 |
+
2/14/2023 8:27:24,Romance,3 hrs,Sad movie,R,Fiction,Titanic
|
8 |
+
2/14/2023 8:28:39,Horror,2 hrs,Scary movie,R,Fiction,Puss in boots
|
9 |
+
2/14/2023 8:29:33,Action,2 hrs,Action movie,R,Fiction,Avengers
|
10 |
+
2/14/2023 8:29:40,Action,2 hrs,Action movie,Pg 13,Fiction,Avengers
|
11 |
+
2/14/2023 8:30:21,Kids Movie,3 hrs,Scary movie,R,Fiction,Alien
|
12 |
+
2/14/2023 8:30:21,Comedy,1 1/2 hrs,Happy movie,Pg 13,Fiction,Jumanji
|
13 |
+
2/14/2023 8:30:56,Comedy,2 hrs,Thought provoking movie,R,Non-fiction,Puss in boots
|
14 |
+
2/14/2023 8:32:35,Horror,1 1/2 hrs,Scary movie,R,Non-fiction,Alien
|
15 |
+
2/14/2023 8:34:27,Horror,2 hrs,Scary movie,Pg 13,Fiction,Jumanji
|
16 |
+
2/14/2023 8:34:40,Action,2 hrs,Action movie,Pg 13,Fiction,Puss in boots
|
17 |
+
2/14/2023 8:34:50,Action,2 hrs,Action movie,R,Fiction,Jumanji
|
18 |
+
2/14/2023 8:42:25,Comedy,1 1/2 hrs,Happy movie,Pg,Fiction,Puss in boots
|
19 |
+
2/14/2023 8:44:22,Comedy,2 hrs,Action movie,Pg 13,Fiction,Puss in boots
|
20 |
+
2/14/2023 11:11:49,Romance,2 hrs,Happy movie,R,Fiction,Puss in boots
|
21 |
+
2/14/2023 11:18:01,Comedy,1 1/2 hrs,Thought provoking movie,R,Fiction,Jumanji
|
22 |
+
2/14/2023 12:07:32,Horror,1 1/2 hrs,Scary movie,R,Fiction,Puss in boots
|
23 |
+
2/14/2023 12:10:06,Horror,2 hrs,Scary movie,R,Fiction,Alien
|
24 |
+
2/14/2023 12:14:23,Romance,1 1/2 hrs,Action movie,Pg 13,Fiction,Jumanji
|
25 |
+
2/14/2023 12:16:57,Romance,2 hrs,Thought provoking movie,R,Fiction,A Dog's Purpose
|
26 |
+
2/14/2023 12:19:00,Horror,2 hrs,Scary movie,R,Fiction,Alien
|
27 |
+
2/14/2023 12:19:54,Comedy,1 1/2 hrs,Action movie,Pg 13,Fiction,Avengers
|
28 |
+
2/14/2023 12:29:02,Comedy,1 1/2 hrs,Happy movie,Pg 13,Fiction,Avengers
|
29 |
+
2/14/2023 12:38:54,Action,2 hrs,Thought provoking movie,R,Fiction,Puss in boots
|
30 |
+
2/14/2023 12:39:09,Action,2 1/2 hrs,Action movie,R,Non-fiction,Avengers
|
info.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# π [Edit info.md - Your app's title here]
|
2 |
+
|
3 |
+
### π§ Problem Statement and Research Summary
|
4 |
+
[add info about your problem statement and your research here!]
|
5 |
+
|
6 |
+
### π£ Data Collection Plan
|
7 |
+
[Edit info.md - add info about what data you collected and why here!]
|
8 |
+
|
9 |
+
### π₯ Ethical Considerations (Data Privacy and Bias)
|
10 |
+
* Data privacy: [Edit info.md - add info about you considered users' privacy here!]
|
11 |
+
* Bias: [Edit info.md - add info about you considered bias here!]
|
12 |
+
|
13 |
+
### π» Our Team
|
14 |
+
[Edit info.md - add info about your team members here!]
|
15 |
+
|
16 |
+
![aiEDU logo](https://images.squarespace-cdn.com/content/v1/5e4efdef6d10420691f02bc1/5db5a8a3-1761-4fce-a096-bd5f2515162f/aiEDU+_black+logo+stacked.png?format=100w)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
pandas==1.3.4
|
2 |
+
scikit-learn==1.0.1
|
3 |
+
numpy==1.21.4
|