savi-cyber commited on
Commit
587f769
1 Parent(s): ae1cc18

Delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -76
train.py DELETED
@@ -1,76 +0,0 @@
1
-
2
- import joblib
3
-
4
- from sklearn.datasets import fetch_openml
5
-
6
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
- from sklearn.compose import make_column_transformer
8
-
9
- from sklearn.pipeline import make_pipeline
10
-
11
- from sklearn.model_selection import train_test_split, RandomizedSearchCV
12
-
13
- from sklearn.linear_model import LogisticRegression
14
- from sklearn.metrics import accuracy_score, classification_report
15
-
16
- dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
17
-
18
- data_df = dataset.data
19
-
20
- target = 'Machine failure'
21
- numeric_features = [
22
- 'Air temperature [K]',
23
- 'Process temperature [K]',
24
- 'Rotational speed [rpm]',
25
- 'Torque [Nm]',
26
- 'Tool wear [min]'
27
- ]
28
- categorical_features = ['Type']
29
-
30
- print("Creating data subsets")
31
-
32
- X = data_df[numeric_features + categorical_features]
33
- y = data_df[target]
34
-
35
- Xtrain, Xtest, ytrain, ytest = train_test_split(
36
- X, y,
37
- test_size=0.2,
38
- random_state=42
39
- )
40
-
41
- preprocessor = make_column_transformer(
42
- (StandardScaler(), numeric_features),
43
- (OneHotEncoder(handle_unknown='ignore'), categorical_features)
44
- )
45
-
46
- model_logistic_regression = LogisticRegression(n_jobs=-1)
47
-
48
- print("Estimating Best Model Pipeline")
49
-
50
- model_pipeline = make_pipeline(
51
- preprocessor,
52
- model_logistic_regression
53
- )
54
-
55
- param_distribution = {
56
- "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1, 5, 10]
57
- }
58
-
59
- rand_search_cv = RandomizedSearchCV(
60
- model_pipeline,
61
- param_distribution,
62
- n_iter=3,
63
- cv=3,
64
- random_state=42
65
- )
66
-
67
- rand_search_cv.fit(Xtrain, ytrain)
68
-
69
- print("Logging Metrics")
70
- print(f"Accuracy: {rand_search_cv.best_score_}")
71
-
72
- print("Serializing Model")
73
-
74
- saved_model_path = "model.joblib"
75
-
76
- joblib.dump(rand_search_cv.best_estimator_, saved_model_path)