kryox64 commited on
Commit
eb75667
1 Parent(s): 7a4444d

Make changes in file

Browse files

Signed-off-by: Aadhitya A <[email protected]>

Files changed (4) hide show
  1. .gitignore +1 -0
  2. app-plain.py +957 -0
  3. app.py +15 -15
  4. demo/sample.csv +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ env
app-plain.py ADDED
@@ -0,0 +1,957 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ # Import section
3
+ # (Please don't edit this section unless if necessary)
4
+ import copy
5
+ from pathlib import Path
6
+ import warnings
7
+ import holidays
8
+ import seaborn as sns
9
+ import matplotlib
10
+ import matplotlib.dates as mdates
11
+ import matplotlib.pyplot as plt
12
+ plt.style.use('fivethirtyeight')
13
+ import numpy as np
14
+ import pandas as pd
15
+ import glob
16
+ import csv
17
+ import lightning.pytorch as pl
18
+ from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
19
+ from lightning.pytorch.loggers import TensorBoardLogger
20
+ import torch
21
+ from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
22
+ from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
23
+ from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
24
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
25
+ import random
26
+ import gc
27
+ import tensorflow as tf
28
+ import tensorboard as tb
29
+ tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
30
+ import os
31
+ import math
32
+ import sys
33
+ from sklearn.model_selection import train_test_split
34
+ from sklearn.preprocessing import MinMaxScaler
35
+ import tensorflow as tf
36
+ from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
37
+ from tensorflow.keras.layers import MaxPooling1D, Flatten
38
+ from tensorflow.keras.regularizers import L1, L2
39
+ from tensorflow.keras.metrics import Accuracy
40
+ from tensorflow.keras.metrics import RootMeanSquaredError
41
+ from sklearn.metrics import mean_squared_error as MSE
42
+ from sklearn.model_selection import KFold
43
+ from sklearn.inspection import permutation_importance
44
+ from tensorflow.keras.utils import plot_model
45
+ from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance, mean_squared_error, mean_squared_log_error, d2_absolute_error_score, d2_pinball_score, d2_tweedie_score
46
+ from sklearn.metrics import r2_score
47
+ from sklearn.metrics import max_error
48
+ import datetime
49
+ from datetime import date
50
+ import optuna
51
+ from tensorflow.keras.callbacks import Callback
52
+ from optuna.integration import TFKerasPruningCallback
53
+ import shutil
54
+ import gradio as gr
55
+
56
+ # Some variables (don't edit these variables unless if necessary)
57
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
58
+ random.seed(30)
59
+ np.random.seed(30)
60
+ tf.random.set_seed(30)
61
+ torch.manual_seed(30)
62
+ torch.cuda.manual_seed(30)
63
+
64
+ # Global variables
65
+ PATIENCE = 30
66
+ MAX_EPOCHS = 3
67
+ LEARNING_RATE = 0.01
68
+ OPTUNA = True
69
+ ACCELERATOR = "cpu"
70
+ # This below line is only for GPU. Don't use it for CPU
71
+ #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024"
72
+
73
+ # Variables to count the number of files
74
+ w = 7
75
+ prax = [0 for x in range(w)]
76
+
77
+ # %%
78
+ # Objective function for Optuna (CNN-LSTM)
79
+ def objective(trial, X_train, y_train, X_test, y_test):
80
+ model = tf.keras.Sequential()
81
+
82
+ # Creating the Neural Network model here...
83
+ # CNN layers
84
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
85
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
86
+
87
+ # LSTM layers
88
+ model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_1", 32, 256), return_sequences=True)))
89
+ model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5)))
90
+ model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_2", 32, 256), return_sequences=False)))
91
+ model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5)))
92
+
93
+ #Final layers
94
+ model.add(Dense(1, activation='relu'))
95
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
96
+
97
+ # Train the model
98
+ pruning_callback = TFKerasPruningCallback(trial, "val_loss")
99
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=32, verbose=0, callbacks=[pruning_callback])
100
+
101
+ # Evaluate the model
102
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
103
+
104
+ return loss
105
+
106
+ # %%
107
+ # Function to train the model (CNN-LSTM)
108
+ def modelCNNLSTM(csv_file, prax):
109
+ # Read the data
110
+ df = csv_file
111
+ #df = df['Date/Time'].values.astype("float64")
112
+ temp_data = df.iloc[0:len(df)-100, 1:21]
113
+ trek = df.iloc[len(df)-100:,1:21]
114
+ #print(temp_data)
115
+ data = temp_data
116
+ sc = MinMaxScaler()
117
+ # Split the data into training and testing sets
118
+ train_size = int(len(data) * 0.8)
119
+ train_data, test_data = data[:train_size], data[train_size:]
120
+ # Separate the input features and target variable
121
+ X_train, y_train = train_data, train_data['Close']
122
+ X_test, y_test = test_data, test_data['Close']
123
+
124
+ X_train = X_train[0:len(X_train)-1]
125
+ y_train = y_train[1:len(y_train)]
126
+ X_test = X_test[0:len(X_test)-1]
127
+ y_test = y_test[1:len(y_test)]
128
+
129
+ Xt = X_train
130
+ Xts = X_test
131
+ Yt = y_train
132
+ Yts = y_test
133
+
134
+ y_train = y_train.values.reshape(-1,1)
135
+ y_test = y_test.values.reshape(-1,1)
136
+
137
+ X_train = sc.fit_transform(X_train)
138
+ y_train = sc.fit_transform(y_train)
139
+ X_test = sc.fit_transform(X_test)
140
+ y_test = sc.fit_transform(y_test)
141
+
142
+ x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
143
+ y_tr=pd.DataFrame(y_train, index = Yt.index)
144
+ x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
145
+ y_te=pd.DataFrame(y_test, index = Yts.index)
146
+
147
+ # Reshape the data for the CNN-LSTM model
148
+ X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
149
+ X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
150
+
151
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=4, n_startup_trials=4))
152
+ fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
153
+ study.optimize(fn, n_trials=5)
154
+
155
+ best_params = study.best_params
156
+ #print(f"Best params: {best_params}")
157
+
158
+ model = tf.keras.Sequential()
159
+
160
+ # Creating the Neural Network model here...
161
+ # CNN layers
162
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
163
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
164
+
165
+ # LSTM layers
166
+ model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
167
+ model.add(Dropout(best_params["dropout_1"]))
168
+ model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
169
+ model.add(Dropout(best_params["dropout_2"]))
170
+
171
+ #Final layers
172
+ model.add(Dense(1, activation='relu'))
173
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
174
+
175
+ # Train the model
176
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
177
+
178
+ # Evaluate the model
179
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
180
+
181
+ print(f"Final loss (without KFold): {loss}")
182
+
183
+ kfold = KFold(n_splits=10, shuffle=True)
184
+
185
+ inputs = np.concatenate((X_train, X_test), axis=0)
186
+ targets = np.concatenate((y_train, y_test), axis=0)
187
+ acc_per_fold = []
188
+ loss_per_fold = []
189
+ xgb_res = []
190
+ num_epochs = 10
191
+ batch_size = 32
192
+
193
+ fold_no = 1
194
+ print('------------------------------------------------------------------------')
195
+ print("Training for 10 folds... Standby")
196
+ for train, test in kfold.split(inputs, targets):
197
+ #print('------------------------------------------------------------------------')
198
+ #print(f'Training for fold {fold_no} ...')
199
+ history = model.fit(inputs[train], targets[train],
200
+ batch_size=32,
201
+ epochs=15,
202
+ verbose=0)
203
+
204
+ scores = model.evaluate(inputs[test], targets[test], verbose=0)
205
+ #print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
206
+ acc_per_fold.append(scores[1] * 100)
207
+ loss_per_fold.append(scores[0])
208
+ fold_no = fold_no + 1
209
+
210
+
211
+ print('------------------------------------------------------------------------')
212
+ #print('Score per fold')
213
+ #for i in range(0, len(acc_per_fold)):
214
+ # print('------------------------------------------------------------------------')
215
+ # print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
216
+ #print('------------------------------------------------------------------------')
217
+ #print('Average scores for all folds:')
218
+ #print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
219
+ #print(f'> Loss: {np.mean(loss_per_fold)}')
220
+ #print('------------------------------------------------------------------------')
221
+
222
+ trek = df.iloc[0:len(df), 1:21]
223
+ Y = trek[0:len(trek)]
224
+ YP = trek[1:len(trek)]
225
+ Y1 = Y['Close']
226
+ Y2 = YP['Close']
227
+ Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
228
+ #X = sc.fit_transform(X.reshape(-1,22))
229
+ Y = np.array(Y)
230
+ Y1 = np.array(Y1)
231
+ Y = sc.fit_transform(Y)
232
+ Y1 = Y1.reshape(-1,1)
233
+ Y1 = sc.fit_transform(Y1)
234
+
235
+ train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
236
+ #Y = Y.reshape(-1,1)
237
+ pred = model.predict(train_X, verbose=0)
238
+ pred = np.array(pred).reshape(-1,1)
239
+ var2 = max_error(pred.reshape(-1,1), Y1)
240
+ print('Max Error: %f' % var2)
241
+ prax[5] = float(var2)
242
+ pred = sc.inverse_transform(pred)
243
+
244
+ print(pred[-2], pred[-1])
245
+ prax[3] = pred[-2]
246
+ prax[4] = pred[-1]
247
+ if(pred[-1]-pred[-2]>0):
248
+ prax[6] = 1
249
+ elif(pred[-1]-pred[-2]==0):
250
+ prax[6] = 0
251
+ else:
252
+ prax[6] = -1
253
+
254
+ # %%
255
+ # Function to train the model (CNN-LSTM)
256
+ def modelCNNLSTM_OpenGap(csv_file, prax):
257
+ # Read the data
258
+ df = csv_file
259
+ datLength = len(df)
260
+ df['O-C'] = 0
261
+ for i in range(datLength):
262
+ if i == 0:
263
+ df['O-C'][i] = 0
264
+ continue
265
+ else:
266
+ df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
267
+ temp_data = df.iloc[0:datLength-100, 1:22]
268
+ trek = df.iloc[datLength-100:,1:22]
269
+ #print(temp_data)
270
+ data = temp_data
271
+ #data = data.values.astype("float64")
272
+ sc = MinMaxScaler()
273
+ # Split the data into training and testing sets
274
+ train_size = int(len(data) * 0.8)
275
+ train_data, test_data = data[:train_size], data[train_size:]
276
+
277
+ # Separate the input features and target variable
278
+ X_train, y_train = train_data, train_data['Close']
279
+ X_test, y_test = test_data, test_data['Close']
280
+
281
+ X_train = X_train[0:len(X_train)-1]
282
+ y_train = y_train[1:len(y_train)]
283
+ X_test = X_test[0:len(X_test)-1]
284
+ y_test = y_test[1:len(y_test)]
285
+
286
+ Xt = X_train
287
+ Xts = X_test
288
+ Yt = y_train
289
+ Yts = y_test
290
+
291
+ y_train = y_train.values.reshape(-1,1)
292
+ y_test = y_test.values.reshape(-1,1)
293
+
294
+ X_train = sc.fit_transform(X_train)
295
+ y_train = sc.fit_transform(y_train)
296
+ X_test = sc.fit_transform(X_test)
297
+ y_test = sc.fit_transform(y_test)
298
+
299
+ x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
300
+ y_tr=pd.DataFrame(y_train, index = Yt.index)
301
+ x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
302
+ y_te=pd.DataFrame(y_test, index = Yts.index)
303
+
304
+ # Reshape the data for the CNN-LSTM model
305
+ X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
306
+ X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
307
+
308
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=2, n_startup_trials=2))
309
+ fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
310
+ study.optimize(fn, n_trials=5)
311
+
312
+ best_params = study.best_params
313
+ #print(f"Best params: {best_params}")
314
+
315
+ model = tf.keras.Sequential()
316
+
317
+ # Creating the Neural Network model here...
318
+ # CNN layers
319
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
320
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
321
+
322
+ # LSTM layers
323
+ model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
324
+ model.add(Dropout(best_params["dropout_1"]))
325
+ model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
326
+ model.add(Dropout(best_params["dropout_2"]))
327
+
328
+ #Final layers
329
+ model.add(Dense(1, activation='relu'))
330
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
331
+
332
+ # Train the model
333
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
334
+
335
+ # Evaluate the model
336
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
337
+
338
+ print(f"Final loss (without KFold): {loss}")
339
+
340
+ kfold = KFold(n_splits=10, shuffle=True)
341
+
342
+ inputs = np.concatenate((X_train, X_test), axis=0)
343
+ targets = np.concatenate((y_train, y_test), axis=0)
344
+ acc_per_fold = []
345
+ loss_per_fold = []
346
+ xgb_res = []
347
+ num_epochs = 10
348
+ batch_size = 32
349
+
350
+ fold_no = 1
351
+ print('------------------------------------------------------------------------')
352
+ print("Training for 10 folds... Standby")
353
+ for train, test in kfold.split(inputs, targets):
354
+ #print('------------------------------------------------------------------------')
355
+ #print(f'Training for fold {fold_no} ...')
356
+ history = model.fit(inputs[train], targets[train],
357
+ batch_size=32,
358
+ epochs=15,
359
+ verbose=0)
360
+
361
+ scores = model.evaluate(inputs[test], targets[test], verbose=0)
362
+ #print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
363
+ acc_per_fold.append(scores[1] * 100)
364
+ loss_per_fold.append(scores[0])
365
+ fold_no = fold_no + 1
366
+
367
+
368
+ print('------------------------------------------------------------------------')
369
+ #print('Score per fold')
370
+ #for i in range(0, len(acc_per_fold)):
371
+ # print('------------------------------------------------------------------------')
372
+ # print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
373
+ #print('------------------------------------------------------------------------')
374
+ #print('Average scores for all folds:')
375
+ #print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
376
+ #print(f'> Loss: {np.mean(loss_per_fold)}')
377
+ #print('------------------------------------------------------------------------')
378
+
379
+ trek = df.iloc[0:len(df), 1:22]
380
+ Y = trek[0:len(trek)]
381
+ YP = trek[1:len(trek)]
382
+ Y1 = Y['Close']
383
+ Y2 = YP['Close']
384
+ Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
385
+ #X = sc.fit_transform(X.reshape(-1,22))
386
+ Y = np.array(Y)
387
+ Y1 = np.array(Y1)
388
+ Y = sc.fit_transform(Y)
389
+ Y1 = Y1.reshape(-1,1)
390
+ Y1 = sc.fit_transform(Y1)
391
+
392
+ train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
393
+ #Y = Y.reshape(-1,1)
394
+ pred = model.predict(train_X, verbose=0)
395
+ pred = np.array(pred).reshape(-1,1)
396
+ var2 = max_error(pred.reshape(-1,1), Y1)
397
+ print('Max Error: %f' % var2)
398
+ prax[5] = float(var2)
399
+ pred = sc.inverse_transform(pred)
400
+
401
+ print(pred[-2], pred[-1])
402
+ prax[3] = pred[-2]
403
+ prax[4] = pred[-1]
404
+ if(pred[-1]-pred[-2]>0):
405
+ prax[6] = 1
406
+ elif(pred[-1]-pred[-2]==0):
407
+ prax[6] = 0
408
+ else:
409
+ prax[6] = -1
410
+
411
+ # %%
412
+ # Function to train the model (TFT)
413
+ def modelTFT(csv_file, prax):
414
+ train = csv_file
415
+ #test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
416
+ train['date'] = pd.to_datetime(train['Date/Time'])
417
+ #test['date'] = pd.to_datetime(test['Date'])
418
+
419
+ data = pd.concat([train], axis = 0, ignore_index=True)
420
+ # Check that key is country-store-product-date combination
421
+ #assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
422
+ # Check that there is one date per country-store-product combination
423
+ #assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
424
+
425
+ #display(train.sample(4))
426
+
427
+ """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
428
+
429
+ # Add a time_idx (an sequence of consecutive integers that goes from min to max date)
430
+
431
+ data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
432
+ .rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
433
+ # add additional features
434
+ data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
435
+ data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
436
+ data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
437
+ #data["log_num_sold"] = np.log(data.num_sold + 1e-8)
438
+ #data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
439
+ #data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
440
+ #data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
441
+
442
+ #unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
443
+ #unique_dates_country['is_holiday'] = (unique_dates_country
444
+ # .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
445
+ #unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
446
+ # .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
447
+ #unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
448
+ # .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
449
+ #unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
450
+ # .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
451
+ #unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
452
+ # .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
453
+ #data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
454
+ #del unique_dates_country
455
+ gc.collect()
456
+ data.sample(5, random_state=30)
457
+
458
+ train = data.iloc[:len(train)]
459
+ test = data.iloc[len(train):]
460
+
461
+ max_prediction_length = 2
462
+ max_encoder_length = train.date.nunique()
463
+ training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
464
+
465
+ # Let's create a Dataset
466
+ training = TimeSeriesDataSet(
467
+ train[lambda x: x.time_idx <= training_cutoff],
468
+ time_idx="time_idx",
469
+ target="Close",
470
+ group_ids=["Ticker"],
471
+ min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
472
+ max_encoder_length=max_encoder_length,
473
+ max_prediction_length=max_prediction_length,
474
+ static_categoricals=["Ticker"],
475
+ time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
476
+ #variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
477
+ time_varying_known_reals=["time_idx"],
478
+ time_varying_unknown_categoricals=[],
479
+ time_varying_unknown_reals=[
480
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200'
481
+ ],
482
+ target_normalizer=GroupNormalizer(
483
+ groups=['Ticker'], transformation="softplus"
484
+ ), # use softplus and normalize by group
485
+ categorical_encoders={
486
+ 'week_of_year':NaNLabelEncoder(add_nan=True)
487
+ },
488
+ #lags={'num_sold': [7, 30, 365]},
489
+ add_relative_time_idx=True,
490
+ add_target_scales=True,
491
+ add_encoder_length=True,
492
+ )
493
+
494
+ # create validation set (predict=True) which means to predict the last max_prediction_length points in time
495
+ # for each series
496
+ validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
497
+
498
+ # create dataloaders for model
499
+ batch_size = 128 # set this between 32 to 128
500
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
501
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
502
+
503
+ #let's see how a naive model does
504
+
505
+ actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])#.cuda()
506
+ baseline_predictions = Baseline().predict(val_dataloader)#.cuda()
507
+ (actuals - baseline_predictions).abs().mean().item()
508
+
509
+ sm = SMAPE()
510
+
511
+ print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
512
+
513
+ early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
514
+ lr_logger = LearningRateMonitor() # log the learning rate
515
+ logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
516
+
517
+ trainer = pl.Trainer(
518
+ max_epochs=1,
519
+ accelerator=ACCELERATOR,
520
+ enable_model_summary=False,
521
+ gradient_clip_val=0.25,
522
+ limit_train_batches=10, # coment in for training, running valiation every 30 batches
523
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
524
+ callbacks=[lr_logger, early_stop_callback],
525
+ logger=logger,
526
+ )
527
+
528
+ tft = TemporalFusionTransformer.from_dataset(
529
+ training,
530
+ learning_rate=LEARNING_RATE,
531
+ lstm_layers=2,
532
+ hidden_size=16,
533
+ attention_head_size=2,
534
+ dropout=0.2,
535
+ hidden_continuous_size=8,
536
+ output_size=1, # 7 quantiles by default
537
+ loss=SMAPE(),
538
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
539
+ reduce_on_plateau_patience=4
540
+ )
541
+
542
+ tft.to(DEVICE)
543
+ trainer.fit(
544
+ tft,
545
+ train_dataloaders=train_dataloader,
546
+ val_dataloaders=val_dataloader,
547
+ )
548
+ #torch.cuda.empty_cache()
549
+ #print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
550
+
551
+ if OPTUNA:
552
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
553
+
554
+ # create study
555
+ study = optimize_hyperparameters(
556
+ train_dataloader,
557
+ val_dataloader,
558
+ model_path="optuna_test",
559
+ n_trials=5,
560
+ max_epochs=MAX_EPOCHS,
561
+ gradient_clip_val_range=(0.01, 0.3),
562
+ hidden_size_range=(8, 24),
563
+ hidden_continuous_size_range=(8, 12),
564
+ attention_head_size_range=(2, 4),
565
+ learning_rate_range=(0.01, 0.05),
566
+ dropout_range=(0.1, 0.25),
567
+ trainer_kwargs=dict(limit_train_batches=20),
568
+ reduce_on_plateau_patience=4,
569
+ pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_startup_trials=3),
570
+ use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
571
+ )
572
+ #torch.cuda.empty_cache()
573
+ #'''
574
+ trainer = pl.Trainer(
575
+ max_epochs=MAX_EPOCHS,
576
+ accelerator=ACCELERATOR,
577
+ enable_model_summary=False,
578
+ gradient_clip_val=study.best_params['gradient_clip_val'],
579
+ limit_train_batches=20, # coment in for training, running valiation every 30 batches
580
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
581
+ callbacks=[lr_logger, early_stop_callback],
582
+ logger=logger,
583
+ )
584
+
585
+ tft = TemporalFusionTransformer.from_dataset(
586
+ training,
587
+ learning_rate=study.best_params['learning_rate'],
588
+ lstm_layers=2,
589
+ hidden_size=study.best_params['hidden_size'],
590
+ attention_head_size=study.best_params['attention_head_size'],
591
+ dropout=study.best_params['dropout'],
592
+ hidden_continuous_size=study.best_params['hidden_continuous_size'],
593
+ output_size=1, # 7 quantiles by default
594
+ loss=SMAPE(),
595
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
596
+ reduce_on_plateau_patience=4
597
+ )
598
+
599
+ tft.to(DEVICE)
600
+ trainer.fit(
601
+ tft,
602
+ train_dataloaders=train_dataloader,
603
+ val_dataloaders=val_dataloader,
604
+ )
605
+ #'''
606
+ #torch.cuda.empty_cache()
607
+ best_model_path = trainer.checkpoint_callback.best_model_path
608
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
609
+ actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])#.cuda()
610
+ predictions = best_tft.predict(val_dataloader, mode="prediction")
611
+ raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
612
+
613
+ sm = SMAPE()
614
+ print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}")
615
+ prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
616
+ #best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
617
+
618
+ print(raw_predictions[0][0])
619
+ prax[3] = '-'
620
+ prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
621
+ t = prax[4]
622
+ tm = data['Close'][len(data)-1]
623
+ if(t-tm>0):
624
+ prax[6] = 1
625
+ elif(t-tm==0):
626
+ prax[6] = 0
627
+ else:
628
+ prax[6] = -1
629
+ #prax[i][3] = raw_predictions[0][0].data[1]
630
+ print("-----------")
631
+
632
+ #with open("out.csv", "w", newline="") as f:
633
+ # writer = csv.writer(f)
634
+ # writer.writerows(prax)
635
+
636
+ # %%
637
+ # Function to train the model (TFT)
638
+ def modelTFT_OpenGap(csv_file, prax):
639
+ train = csv_file
640
+ #test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
641
+ train['date'] = pd.to_datetime(train['Date/Time'])
642
+ #test['date'] = pd.to_datetime(test['Date'])
643
+ datLength = len(train)
644
+ train['O-C'] = 0
645
+ for i in range(datLength):
646
+ if i == 0:
647
+ train['O-C'][i] = 0
648
+ continue
649
+ else:
650
+ train['O-C'][i] = train['Open'][i] - train['Close'][i-1]
651
+ data = pd.concat([train], axis = 0, ignore_index=True)
652
+ # Check that key is country-store-product-date combination
653
+ #assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
654
+ # Check that there is one date per country-store-product combination
655
+ #assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
656
+
657
+ #display(train.sample(4))
658
+
659
+ """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
660
+
661
+ # Add a time_idx (an sequence of consecutive integers that goes from min to max date)
662
+
663
+ data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
664
+ .rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
665
+ # add additional features
666
+ data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
667
+ data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
668
+ data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
669
+ #data["log_num_sold"] = np.log(data.num_sold + 1e-8)
670
+ #data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
671
+ #data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
672
+ #data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
673
+
674
+ #unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
675
+ #unique_dates_country['is_holiday'] = (unique_dates_country
676
+ # .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
677
+ #unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
678
+ # .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
679
+ #unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
680
+ # .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
681
+ #unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
682
+ # .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
683
+ #unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
684
+ # .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
685
+ #data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
686
+ #del unique_dates_country
687
+ gc.collect()
688
+ data.sample(5, random_state=30)
689
+
690
+ train = data.iloc[:len(train)]
691
+ test = data.iloc[len(train):]
692
+
693
+ max_prediction_length = 2
694
+ max_encoder_length = train.date.nunique()
695
+ training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
696
+
697
+ # Let's create a Dataset
698
+ training = TimeSeriesDataSet(
699
+ train[lambda x: x.time_idx <= training_cutoff],
700
+ time_idx="time_idx",
701
+ target="Close",
702
+ group_ids=["Ticker"],
703
+ min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
704
+ max_encoder_length=max_encoder_length,
705
+ max_prediction_length=max_prediction_length,
706
+ static_categoricals=["Ticker"],
707
+ time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
708
+ #variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
709
+ time_varying_known_reals=["time_idx"],
710
+ time_varying_unknown_categoricals=[],
711
+ time_varying_unknown_reals=[
712
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200', 'O-C'
713
+ ],
714
+ target_normalizer=GroupNormalizer(
715
+ groups=['Ticker'], transformation="softplus"
716
+ ), # use softplus and normalize by group
717
+ categorical_encoders={
718
+ 'week_of_year':NaNLabelEncoder(add_nan=True)
719
+ },
720
+ #lags={'num_sold': [7, 30, 365]},
721
+ add_relative_time_idx=True,
722
+ add_target_scales=True,
723
+ add_encoder_length=True,
724
+ )
725
+
726
+ # create validation set (predict=True) which means to predict the last max_prediction_length points in time
727
+ # for each series
728
+ validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
729
+
730
+ # create dataloaders for model
731
+ batch_size = 128 # set this between 32 to 128
732
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
733
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
734
+
735
+ #let's see how a naive model does
736
+
737
+ actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])#.cuda()
738
+ baseline_predictions = Baseline().predict(val_dataloader)#.cuda()
739
+ (actuals - baseline_predictions).abs().mean().item()
740
+
741
+ sm = SMAPE()
742
+
743
+ print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
744
+
745
+ early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
746
+ lr_logger = LearningRateMonitor() # log the learning rate
747
+ logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
748
+
749
+ trainer = pl.Trainer(
750
+ max_epochs=1,
751
+ accelerator=ACCELERATOR,
752
+ enable_model_summary=False,
753
+ gradient_clip_val=0.25,
754
+ limit_train_batches=10, # coment in for training, running valiation every 30 batches
755
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
756
+ callbacks=[lr_logger, early_stop_callback],
757
+ logger=logger,
758
+ )
759
+
760
+ tft = TemporalFusionTransformer.from_dataset(
761
+ training,
762
+ learning_rate=LEARNING_RATE,
763
+ lstm_layers=2,
764
+ hidden_size=16,
765
+ attention_head_size=2,
766
+ dropout=0.2,
767
+ hidden_continuous_size=8,
768
+ output_size=1, # 7 quantiles by default
769
+ loss=SMAPE(),
770
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
771
+ reduce_on_plateau_patience=4
772
+ )
773
+
774
+ tft.to(DEVICE)
775
+ trainer.fit(
776
+ tft,
777
+ train_dataloaders=train_dataloader,
778
+ val_dataloaders=val_dataloader,
779
+ )
780
+ #torch.cuda.empty_cache()
781
+ #print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
782
+
783
+ if OPTUNA:
784
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
785
+
786
+ # create study
787
+ study = optimize_hyperparameters(
788
+ train_dataloader,
789
+ val_dataloader,
790
+ model_path="optuna_test",
791
+ n_trials=5,
792
+ max_epochs=MAX_EPOCHS,
793
+ gradient_clip_val_range=(0.01, 0.3),
794
+ hidden_size_range=(8, 24),
795
+ hidden_continuous_size_range=(8, 12),
796
+ attention_head_size_range=(2, 4),
797
+ learning_rate_range=(0.01, 0.05),
798
+ dropout_range=(0.1, 0.25),
799
+ trainer_kwargs=dict(limit_train_batches=20),
800
+ reduce_on_plateau_patience=4,
801
+ pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_warmup_steps=3),
802
+ use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
803
+ )
804
+ #torch.cuda.empty_cache()
805
+ #'''
806
+ trainer = pl.Trainer(
807
+ max_epochs=MAX_EPOCHS,
808
+ accelerator=ACCELERATOR,
809
+ enable_model_summary=False,
810
+ gradient_clip_val=study.best_params['gradient_clip_val'],
811
+ limit_train_batches=20, # coment in for training, running valiation every 30 batches
812
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
813
+ callbacks=[lr_logger, early_stop_callback],
814
+ logger=logger,
815
+ )
816
+
817
+ tft = TemporalFusionTransformer.from_dataset(
818
+ training,
819
+ learning_rate=study.best_params['learning_rate'],
820
+ lstm_layers=2,
821
+ hidden_size=study.best_params['hidden_size'],
822
+ attention_head_size=study.best_params['attention_head_size'],
823
+ dropout=study.best_params['dropout'],
824
+ hidden_continuous_size=study.best_params['hidden_continuous_size'],
825
+ output_size=1, # 7 quantiles by default
826
+ loss=SMAPE(),
827
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
828
+ reduce_on_plateau_patience=4
829
+ )
830
+
831
+ tft.to(DEVICE)
832
+ trainer.fit(
833
+ tft,
834
+ train_dataloaders=train_dataloader,
835
+ val_dataloaders=val_dataloader,
836
+ )
837
+ #'''
838
+ #torch.cuda.empty_cache()
839
+ best_model_path = trainer.checkpoint_callback.best_model_path
840
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
841
+ actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])#.cuda()
842
+ predictions = best_tft.predict(val_dataloader, mode="prediction")
843
+ raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
844
+
845
+ sm = SMAPE()
846
+ print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}")
847
+ prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
848
+ #best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
849
+
850
+ print(raw_predictions[0][0])
851
+ prax[3] = '-'
852
+ prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
853
+ t = prax[4]
854
+ tm = data['Close'][len(data)-1]
855
+ if(t-tm>0):
856
+ prax[6] = 1
857
+ elif(t-tm==0):
858
+ prax[6] = 0
859
+ else:
860
+ prax[6] = -1
861
+ #prax[i][3] = raw_predictions[0][0].data[1]
862
+ print("-----------")
863
+
864
+ #with open("out.csv", "w", newline="") as f:
865
+ # writer = csv.writer(f)
866
+ # writer.writerows(prax)
867
+
868
+ # %%
869
+ def generate_csv(data_list):
870
+ today = date.today().strftime("%Y_%m_%d")
871
+ filename = f"result_{today}.csv"
872
+ file_exists = os.path.isfile(filename)
873
+ with open(filename, mode='a', newline='') as csv_file:
874
+ fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
875
+ writer = csv.writer(csv_file, delimiter=',')
876
+ if not file_exists:
877
+ writer.writerow(fieldnames) # file doesn't exist yet, write a header
878
+ writer.writerow(data_list)
879
+ csv_file.close()
880
+
881
+ def guess_date(string):
882
+ for fmt in ["%Y/%m/%d", "%d-%m-%Y", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%Y-%m-%d", "%d/%m/%y", "%m/%d/%y"]:
883
+ try:
884
+ return datetime.datetime.strptime(string, fmt).date()
885
+ except ValueError:
886
+ continue
887
+ raise ValueError(string)
888
+
889
+ # %%
890
+ # Main function
891
+ def main():
892
+ # Data loading
893
+ path = "./demo"
894
+ print("Searching CSV files in ", path, "...")
895
+ # path = "/kaggle/input/artemis-test"
896
+
897
+ # Get a list of all the CSV files in the folder
898
+ csv_files = glob.glob(path + "/*.csv")
899
+ prax = [0,0,0,0,0,0,0]
900
+ # Create a list of DataFrames, one for each CSV file
901
+ dfs = []
902
+ c = 0
903
+ for csv_file in csv_files:
904
+ df = pd.read_csv(csv_file)
905
+ dfs.append(df)
906
+ c = c + 1
907
+
908
+ if c == 0:
909
+ print("No CSV files found in ", path, ".")
910
+ print("Exiting...")
911
+
912
+ for df in dfs:
913
+ #print(df.head())
914
+ print(df['Ticker'][0])
915
+ prax[0] = df['Ticker'][0]
916
+ prax[1] = df['Close'][len(df)-1]
917
+ print('------------------')
918
+ #df = df.drop(['Volume'], axis=1)
919
+ for i in range(len(df)):
920
+ x = guess_date(df['Date/Time'][i])
921
+ df['Date/Time'][i] = x.strftime("%Y-%m-%d")
922
+ df['Date/Time'] = pd.to_datetime(df['Date/Time'])
923
+ df.fillna(0, inplace=True)
924
+ modelTFT(df, prax)
925
+ prax[2] = "TFT"
926
+ generate_csv(prax)
927
+ prax = [0,0,0,0,0,0,0]
928
+ modelTFT_OpenGap(df, prax)
929
+ prax[2] = "TFT_OpenGap"
930
+ generate_csv(prax)
931
+ #df.set_index('Date/Time', inplace=True)
932
+ df = df.drop(['Date/Time'], axis=1)
933
+ prax = [0,0,0,0,0,0,0]
934
+ modelCNNLSTM(df, prax)
935
+ prax[2] = "CNNLSTM"
936
+ generate_csv(prax)
937
+ prax = [0,0,0,0,0,0,0]
938
+ modelCNNLSTM_OpenGap(df, prax)
939
+ prax[2] = "CNNLSTM_OpenGap"
940
+ #print("Saving to CSV...Standby...")
941
+ generate_csv(prax)
942
+ # Generate blank line
943
+ prax=["","","","","","",""]
944
+ generate_csv(prax)
945
+ # Reset prax
946
+ prax = [0,0,0,0,0,0,0]
947
+
948
+ if __name__ == "__main__":
949
+ main()
950
+
951
+ print("Deleting temporary log files...")
952
+ # Delete "lightning_logs" directory
953
+ if os.path.exists("lightning_logs"):
954
+ shutil.rmtree("lightning_logs")
955
+ # Delete "optuna_test" directory
956
+ if os.path.exists("optuna_test"):
957
+ shutil.rmtree("optuna_test")
app.py CHANGED
@@ -108,9 +108,9 @@ def objective(trial, X_train, y_train, X_test, y_test):
108
  def modelCNNLSTM(csv_file, prax):
109
  # Read the data
110
  df = csv_file
111
- df = df['Date/Time'].values.astype("float64")
112
- temp_data = df.iloc[0:len(df)-100, 1:23]
113
- trek = df.iloc[len(df)-100:,1:23]
114
  #print(temp_data)
115
  data = temp_data
116
  sc = MinMaxScaler()
@@ -148,9 +148,9 @@ def modelCNNLSTM(csv_file, prax):
148
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
149
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
150
 
151
- study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5))
152
  fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
153
- study.optimize(fn, n_trials=7)
154
 
155
  best_params = study.best_params
156
  #print(f"Best params: {best_params}")
@@ -219,7 +219,7 @@ def modelCNNLSTM(csv_file, prax):
219
  #print(f'> Loss: {np.mean(loss_per_fold)}')
220
  #print('------------------------------------------------------------------------')
221
 
222
- trek = df.iloc[0:len(df), 1:23]
223
  Y = trek[0:len(trek)]
224
  YP = trek[1:len(trek)]
225
  Y1 = Y['Close']
@@ -264,8 +264,8 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
264
  continue
265
  else:
266
  df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
267
- temp_data = df.iloc[0:datLength-100, 1:24]
268
- trek = df.iloc[datLength-100:,1:24]
269
  #print(temp_data)
270
  data = temp_data
271
  #data = data.values.astype("float64")
@@ -305,9 +305,9 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
305
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
306
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
307
 
308
- study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5))
309
  fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
310
- study.optimize(fn, n_trials=7)
311
 
312
  best_params = study.best_params
313
  #print(f"Best params: {best_params}")
@@ -376,7 +376,7 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
376
  #print(f'> Loss: {np.mean(loss_per_fold)}')
377
  #print('------------------------------------------------------------------------')
378
 
379
- trek = df.iloc[0:len(df), 1:24]
380
  Y = trek[0:len(trek)]
381
  YP = trek[1:len(trek)]
382
  Y1 = Y['Close']
@@ -477,7 +477,7 @@ def modelTFT(csv_file, prax):
477
  time_varying_known_reals=["time_idx"],
478
  time_varying_unknown_categoricals=[],
479
  time_varying_unknown_reals=[
480
- 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200'
481
  ],
482
  target_normalizer=GroupNormalizer(
483
  groups=['Ticker'], transformation="softplus"
@@ -709,7 +709,7 @@ def modelTFT_OpenGap(csv_file, prax):
709
  time_varying_known_reals=["time_idx"],
710
  time_varying_unknown_categoricals=[],
711
  time_varying_unknown_reals=[
712
- 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200', 'O-C'
713
  ],
714
  target_normalizer=GroupNormalizer(
715
  groups=['Ticker'], transformation="softplus"
@@ -867,7 +867,8 @@ def modelTFT_OpenGap(csv_file, prax):
867
 
868
  # %%
869
  def generate_csv(data_list):
870
- filename = f"result.csv"
 
871
  file_exists = os.path.isfile(filename)
872
  with open(filename, mode='a', newline='') as csv_file:
873
  fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
@@ -936,7 +937,6 @@ def main(files):
936
 
937
  gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file")
938
 
939
-
940
  if __name__ == "__main__":
941
  # Calling main function
942
  gradioApp.launch()
 
108
  def modelCNNLSTM(csv_file, prax):
109
  # Read the data
110
  df = csv_file
111
+ #df = df['Date/Time'].values.astype("float64")
112
+ temp_data = df.iloc[0:len(df)-100, 1:21]
113
+ trek = df.iloc[len(df)-100:,1:21]
114
  #print(temp_data)
115
  data = temp_data
116
  sc = MinMaxScaler()
 
148
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
149
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
150
 
151
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=4, n_startup_trials=4))
152
  fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
153
+ study.optimize(fn, n_trials=5)
154
 
155
  best_params = study.best_params
156
  #print(f"Best params: {best_params}")
 
219
  #print(f'> Loss: {np.mean(loss_per_fold)}')
220
  #print('------------------------------------------------------------------------')
221
 
222
+ trek = df.iloc[0:len(df), 1:21]
223
  Y = trek[0:len(trek)]
224
  YP = trek[1:len(trek)]
225
  Y1 = Y['Close']
 
264
  continue
265
  else:
266
  df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
267
+ temp_data = df.iloc[0:datLength-100, 1:22]
268
+ trek = df.iloc[datLength-100:,1:22]
269
  #print(temp_data)
270
  data = temp_data
271
  #data = data.values.astype("float64")
 
305
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
306
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
307
 
308
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=2, n_startup_trials=2))
309
  fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
310
+ study.optimize(fn, n_trials=5)
311
 
312
  best_params = study.best_params
313
  #print(f"Best params: {best_params}")
 
376
  #print(f'> Loss: {np.mean(loss_per_fold)}')
377
  #print('------------------------------------------------------------------------')
378
 
379
+ trek = df.iloc[0:len(df), 1:22]
380
  Y = trek[0:len(trek)]
381
  YP = trek[1:len(trek)]
382
  Y1 = Y['Close']
 
477
  time_varying_known_reals=["time_idx"],
478
  time_varying_unknown_categoricals=[],
479
  time_varying_unknown_reals=[
480
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200'
481
  ],
482
  target_normalizer=GroupNormalizer(
483
  groups=['Ticker'], transformation="softplus"
 
709
  time_varying_known_reals=["time_idx"],
710
  time_varying_unknown_categoricals=[],
711
  time_varying_unknown_reals=[
712
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200', 'O-C'
713
  ],
714
  target_normalizer=GroupNormalizer(
715
  groups=['Ticker'], transformation="softplus"
 
867
 
868
  # %%
869
  def generate_csv(data_list):
870
+ today = date.today().strftime("%Y_%m_%d")
871
+ filename = f"result_{today}.csv"
872
  file_exists = os.path.isfile(filename)
873
  with open(filename, mode='a', newline='') as csv_file:
874
  fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
 
937
 
938
  gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file")
939
 
 
940
  if __name__ == "__main__":
941
  # Calling main function
942
  gradioApp.launch()
demo/sample.csv CHANGED
The diff for this file is too large to render. See raw diff