savi-cyber commited on
Commit
ae1cc18
1 Parent(s): d5daeb5

renaming the application file

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import joblib
3
+
4
+ from sklearn.datasets import fetch_openml
5
+
6
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
+ from sklearn.compose import make_column_transformer
8
+
9
+ from sklearn.pipeline import make_pipeline
10
+
11
+ from sklearn.model_selection import train_test_split, RandomizedSearchCV
12
+
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.metrics import accuracy_score, classification_report
15
+
16
+ dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
17
+
18
+ data_df = dataset.data
19
+
20
+ target = 'Machine failure'
21
+ numeric_features = [
22
+ 'Air temperature [K]',
23
+ 'Process temperature [K]',
24
+ 'Rotational speed [rpm]',
25
+ 'Torque [Nm]',
26
+ 'Tool wear [min]'
27
+ ]
28
+ categorical_features = ['Type']
29
+
30
+ print("Creating data subsets")
31
+
32
+ X = data_df[numeric_features + categorical_features]
33
+ y = data_df[target]
34
+
35
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
36
+ X, y,
37
+ test_size=0.2,
38
+ random_state=42
39
+ )
40
+
41
+ preprocessor = make_column_transformer(
42
+ (StandardScaler(), numeric_features),
43
+ (OneHotEncoder(handle_unknown='ignore'), categorical_features)
44
+ )
45
+
46
+ model_logistic_regression = LogisticRegression(n_jobs=-1)
47
+
48
+ print("Estimating Best Model Pipeline")
49
+
50
+ model_pipeline = make_pipeline(
51
+ preprocessor,
52
+ model_logistic_regression
53
+ )
54
+
55
+ param_distribution = {
56
+ "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1, 5, 10]
57
+ }
58
+
59
+ rand_search_cv = RandomizedSearchCV(
60
+ model_pipeline,
61
+ param_distribution,
62
+ n_iter=3,
63
+ cv=3,
64
+ random_state=42
65
+ )
66
+
67
+ rand_search_cv.fit(Xtrain, ytrain)
68
+
69
+ print("Logging Metrics")
70
+ print(f"Accuracy: {rand_search_cv.best_score_}")
71
+
72
+ print("Serializing Model")
73
+
74
+ saved_model_path = "model.joblib"
75
+
76
+ joblib.dump(rand_search_cv.best_estimator_, saved_model_path)