kryox64 commited on
Commit
cbff113
1 Parent(s): 0a3df69

Add CUDA variant

Browse files

Signed-off-by: Aadhitya A <[email protected]>

Files changed (2) hide show
  1. app-cuda.py +941 -0
  2. app.py +0 -7
app-cuda.py ADDED
@@ -0,0 +1,941 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ # Import section
3
+ # (Please don't edit this section unless if necessary)
4
+ import copy
5
+ from pathlib import Path
6
+ import warnings
7
+ import holidays
8
+ import seaborn as sns
9
+ import matplotlib
10
+ import matplotlib.dates as mdates
11
+ import matplotlib.pyplot as plt
12
+ plt.style.use('fivethirtyeight')
13
+ import numpy as np
14
+ import pandas as pd
15
+ import glob
16
+ import csv
17
+ import lightning.pytorch as pl
18
+ from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
19
+ from lightning.pytorch.loggers import TensorBoardLogger
20
+ import torch
21
+ from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
22
+ from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
23
+ from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
24
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
25
+ import random
26
+ import gc
27
+ import tensorflow as tf
28
+ import tensorboard as tb
29
+ tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
30
+ import os
31
+ import math
32
+ import sys
33
+ from sklearn.model_selection import train_test_split
34
+ from sklearn.preprocessing import MinMaxScaler
35
+ import tensorflow as tf
36
+ from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
37
+ from tensorflow.keras.layers import MaxPooling1D, Flatten
38
+ from tensorflow.keras.regularizers import L1, L2
39
+ from tensorflow.keras.metrics import Accuracy
40
+ from tensorflow.keras.metrics import RootMeanSquaredError
41
+ from sklearn.metrics import mean_squared_error as MSE
42
+ from sklearn.model_selection import KFold
43
+ from sklearn.inspection import permutation_importance
44
+ from tensorflow.keras.utils import plot_model
45
+ from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance, mean_squared_error, mean_squared_log_error, d2_absolute_error_score, d2_pinball_score, d2_tweedie_score
46
+ from sklearn.metrics import r2_score
47
+ from sklearn.metrics import max_error
48
+ import datetime
49
+ from datetime import date
50
+ import optuna
51
+ from tensorflow.keras.callbacks import Callback
52
+ from optuna.integration import TFKerasPruningCallback
53
+ import shutil
54
+ import gradio as gr
55
+
56
+ # Some variables (don't edit these variables unless if necessary)
57
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
58
+ random.seed(30)
59
+ np.random.seed(30)
60
+ tf.random.set_seed(30)
61
+ torch.manual_seed(30)
62
+ torch.cuda.manual_seed(30)
63
+
64
+ # Global variables
65
+ PATIENCE = 30
66
+ MAX_EPOCHS = 3
67
+ LEARNING_RATE = 0.01
68
+ OPTUNA = True
69
+ ACCELERATOR = "gpu"
70
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024"
71
+
72
+ # Variables to count the number of files
73
+ w = 7
74
+ prax = [0 for x in range(w)]
75
+
76
+ # %%
77
+ # Objective function for Optuna (CNN-LSTM)
78
+ def objective(trial, X_train, y_train, X_test, y_test):
79
+ model = tf.keras.Sequential()
80
+
81
+ # Creating the Neural Network model here...
82
+ # CNN layers
83
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
84
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
85
+
86
+ # LSTM layers
87
+ model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_1", 32, 256), return_sequences=True)))
88
+ model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5)))
89
+ model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_2", 32, 256), return_sequences=False)))
90
+ model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5)))
91
+
92
+ #Final layers
93
+ model.add(Dense(1, activation='relu'))
94
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
95
+
96
+ # Train the model
97
+ pruning_callback = TFKerasPruningCallback(trial, "val_loss")
98
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=32, verbose=0, callbacks=[pruning_callback])
99
+
100
+ # Evaluate the model
101
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
102
+
103
+ return loss
104
+
105
+ # %%
106
+ # Function to train the model (CNN-LSTM)
107
+ def modelCNNLSTM(csv_file, prax):
108
+ # Read the data
109
+ df = csv_file
110
+ df = df['Date/Time'].values.astype("float64")
111
+ temp_data = df.iloc[0:len(df)-100, 1:23]
112
+ trek = df.iloc[len(df)-100:,1:23]
113
+ #print(temp_data)
114
+ data = temp_data
115
+ sc = MinMaxScaler()
116
+ # Split the data into training and testing sets
117
+ train_size = int(len(data) * 0.8)
118
+ train_data, test_data = data[:train_size], data[train_size:]
119
+ # Separate the input features and target variable
120
+ X_train, y_train = train_data, train_data['Close']
121
+ X_test, y_test = test_data, test_data['Close']
122
+
123
+ X_train = X_train[0:len(X_train)-1]
124
+ y_train = y_train[1:len(y_train)]
125
+ X_test = X_test[0:len(X_test)-1]
126
+ y_test = y_test[1:len(y_test)]
127
+
128
+ Xt = X_train
129
+ Xts = X_test
130
+ Yt = y_train
131
+ Yts = y_test
132
+
133
+ y_train = y_train.values.reshape(-1,1)
134
+ y_test = y_test.values.reshape(-1,1)
135
+
136
+ X_train = sc.fit_transform(X_train)
137
+ y_train = sc.fit_transform(y_train)
138
+ X_test = sc.fit_transform(X_test)
139
+ y_test = sc.fit_transform(y_test)
140
+
141
+ x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
142
+ y_tr=pd.DataFrame(y_train, index = Yt.index)
143
+ x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
144
+ y_te=pd.DataFrame(y_test, index = Yts.index)
145
+
146
+ # Reshape the data for the CNN-LSTM model
147
+ X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
148
+ X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
149
+
150
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5))
151
+ fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
152
+ study.optimize(fn, n_trials=7)
153
+
154
+ best_params = study.best_params
155
+ #print(f"Best params: {best_params}")
156
+
157
+ model = tf.keras.Sequential()
158
+
159
+ # Creating the Neural Network model here...
160
+ # CNN layers
161
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
162
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
163
+
164
+ # LSTM layers
165
+ model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
166
+ model.add(Dropout(best_params["dropout_1"]))
167
+ model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
168
+ model.add(Dropout(best_params["dropout_2"]))
169
+
170
+ #Final layers
171
+ model.add(Dense(1, activation='relu'))
172
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
173
+
174
+ # Train the model
175
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
176
+
177
+ # Evaluate the model
178
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
179
+
180
+ print(f"Final loss (without KFold): {loss}")
181
+
182
+ kfold = KFold(n_splits=10, shuffle=True)
183
+
184
+ inputs = np.concatenate((X_train, X_test), axis=0)
185
+ targets = np.concatenate((y_train, y_test), axis=0)
186
+ acc_per_fold = []
187
+ loss_per_fold = []
188
+ xgb_res = []
189
+ num_epochs = 10
190
+ batch_size = 32
191
+
192
+ fold_no = 1
193
+ print('------------------------------------------------------------------------')
194
+ print("Training for 10 folds... Standby")
195
+ for train, test in kfold.split(inputs, targets):
196
+ #print('------------------------------------------------------------------------')
197
+ #print(f'Training for fold {fold_no} ...')
198
+ history = model.fit(inputs[train], targets[train],
199
+ batch_size=32,
200
+ epochs=15,
201
+ verbose=0)
202
+
203
+ scores = model.evaluate(inputs[test], targets[test], verbose=0)
204
+ #print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
205
+ acc_per_fold.append(scores[1] * 100)
206
+ loss_per_fold.append(scores[0])
207
+ fold_no = fold_no + 1
208
+
209
+
210
+ print('------------------------------------------------------------------------')
211
+ #print('Score per fold')
212
+ #for i in range(0, len(acc_per_fold)):
213
+ # print('------------------------------------------------------------------------')
214
+ # print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
215
+ #print('------------------------------------------------------------------------')
216
+ #print('Average scores for all folds:')
217
+ #print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
218
+ #print(f'> Loss: {np.mean(loss_per_fold)}')
219
+ #print('------------------------------------------------------------------------')
220
+
221
+ trek = df.iloc[0:len(df), 1:23]
222
+ Y = trek[0:len(trek)]
223
+ YP = trek[1:len(trek)]
224
+ Y1 = Y['Close']
225
+ Y2 = YP['Close']
226
+ Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
227
+ #X = sc.fit_transform(X.reshape(-1,22))
228
+ Y = np.array(Y)
229
+ Y1 = np.array(Y1)
230
+ Y = sc.fit_transform(Y)
231
+ Y1 = Y1.reshape(-1,1)
232
+ Y1 = sc.fit_transform(Y1)
233
+
234
+ train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
235
+ #Y = Y.reshape(-1,1)
236
+ pred = model.predict(train_X, verbose=0)
237
+ pred = np.array(pred).reshape(-1,1)
238
+ var2 = max_error(pred.reshape(-1,1), Y1)
239
+ print('Max Error: %f' % var2)
240
+ prax[5] = float(var2)
241
+ pred = sc.inverse_transform(pred)
242
+
243
+ print(pred[-2], pred[-1])
244
+ prax[3] = pred[-2]
245
+ prax[4] = pred[-1]
246
+ if(pred[-1]-pred[-2]>0):
247
+ prax[6] = 1
248
+ elif(pred[-1]-pred[-2]==0):
249
+ prax[6] = 0
250
+ else:
251
+ prax[6] = -1
252
+
253
+ # %%
254
+ # Function to train the model (CNN-LSTM)
255
+ def modelCNNLSTM_OpenGap(csv_file, prax):
256
+ # Read the data
257
+ df = csv_file
258
+ df = df['Date/Time'].values.astype("float64")
259
+ datLength = len(df)
260
+ df['O-C'] = 0
261
+ for i in range(datLength):
262
+ if i == 0:
263
+ df['O-C'][i] = 0
264
+ continue
265
+ else:
266
+ df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
267
+ temp_data = df.iloc[0:datLength-100, 1:24]
268
+ trek = df.iloc[datLength-100:,1:24]
269
+ #print(temp_data)
270
+ data = temp_data
271
+ sc = MinMaxScaler()
272
+ # Split the data into training and testing sets
273
+ train_size = int(len(data) * 0.8)
274
+ train_data, test_data = data[:train_size], data[train_size:]
275
+
276
+ # Separate the input features and target variable
277
+ X_train, y_train = train_data, train_data['Close']
278
+ X_test, y_test = test_data, test_data['Close']
279
+
280
+ X_train = X_train[0:len(X_train)-1]
281
+ y_train = y_train[1:len(y_train)]
282
+ X_test = X_test[0:len(X_test)-1]
283
+ y_test = y_test[1:len(y_test)]
284
+
285
+ Xt = X_train
286
+ Xts = X_test
287
+ Yt = y_train
288
+ Yts = y_test
289
+
290
+ y_train = y_train.values.reshape(-1,1)
291
+ y_test = y_test.values.reshape(-1,1)
292
+
293
+ X_train = sc.fit_transform(X_train)
294
+ y_train = sc.fit_transform(y_train)
295
+ X_test = sc.fit_transform(X_test)
296
+ y_test = sc.fit_transform(y_test)
297
+
298
+ x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
299
+ y_tr=pd.DataFrame(y_train, index = Yt.index)
300
+ x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
301
+ y_te=pd.DataFrame(y_test, index = Yts.index)
302
+
303
+ # Reshape the data for the CNN-LSTM model
304
+ X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
305
+ X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
306
+
307
+ study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5))
308
+ fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
309
+ study.optimize(fn, n_trials=7)
310
+
311
+ best_params = study.best_params
312
+ #print(f"Best params: {best_params}")
313
+
314
+ model = tf.keras.Sequential()
315
+
316
+ # Creating the Neural Network model here...
317
+ # CNN layers
318
+ model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
319
+ # model.add(Dense(5, kernel_regularizer=L2(0.01)))
320
+
321
+ # LSTM layers
322
+ model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
323
+ model.add(Dropout(best_params["dropout_1"]))
324
+ model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
325
+ model.add(Dropout(best_params["dropout_2"]))
326
+
327
+ #Final layers
328
+ model.add(Dense(1, activation='relu'))
329
+ model.compile(optimizer='adam', loss='mse', metrics=['mse'])
330
+
331
+ # Train the model
332
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
333
+
334
+ # Evaluate the model
335
+ loss = model.evaluate(X_test, y_test, verbose=0)[0]
336
+
337
+ print(f"Final loss (without KFold): {loss}")
338
+
339
+ kfold = KFold(n_splits=10, shuffle=True)
340
+
341
+ inputs = np.concatenate((X_train, X_test), axis=0)
342
+ targets = np.concatenate((y_train, y_test), axis=0)
343
+ acc_per_fold = []
344
+ loss_per_fold = []
345
+ xgb_res = []
346
+ num_epochs = 10
347
+ batch_size = 32
348
+
349
+ fold_no = 1
350
+ print('------------------------------------------------------------------------')
351
+ print("Training for 10 folds... Standby")
352
+ for train, test in kfold.split(inputs, targets):
353
+ #print('------------------------------------------------------------------------')
354
+ #print(f'Training for fold {fold_no} ...')
355
+ history = model.fit(inputs[train], targets[train],
356
+ batch_size=32,
357
+ epochs=15,
358
+ verbose=0)
359
+
360
+ scores = model.evaluate(inputs[test], targets[test], verbose=0)
361
+ #print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
362
+ acc_per_fold.append(scores[1] * 100)
363
+ loss_per_fold.append(scores[0])
364
+ fold_no = fold_no + 1
365
+
366
+
367
+ print('------------------------------------------------------------------------')
368
+ #print('Score per fold')
369
+ #for i in range(0, len(acc_per_fold)):
370
+ # print('------------------------------------------------------------------------')
371
+ # print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
372
+ #print('------------------------------------------------------------------------')
373
+ #print('Average scores for all folds:')
374
+ #print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
375
+ #print(f'> Loss: {np.mean(loss_per_fold)}')
376
+ #print('------------------------------------------------------------------------')
377
+
378
+ trek = df.iloc[0:len(df), 1:24]
379
+ Y = trek[0:len(trek)]
380
+ YP = trek[1:len(trek)]
381
+ Y1 = Y['Close']
382
+ Y2 = YP['Close']
383
+ Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
384
+ #X = sc.fit_transform(X.reshape(-1,22))
385
+ Y = np.array(Y)
386
+ Y1 = np.array(Y1)
387
+ Y = sc.fit_transform(Y)
388
+ Y1 = Y1.reshape(-1,1)
389
+ Y1 = sc.fit_transform(Y1)
390
+
391
+ train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
392
+ #Y = Y.reshape(-1,1)
393
+ pred = model.predict(train_X, verbose=0)
394
+ pred = np.array(pred).reshape(-1,1)
395
+ var2 = max_error(pred.reshape(-1,1), Y1)
396
+ print('Max Error: %f' % var2)
397
+ prax[5] = float(var2)
398
+ pred = sc.inverse_transform(pred)
399
+
400
+ print(pred[-2], pred[-1])
401
+ prax[3] = pred[-2]
402
+ prax[4] = pred[-1]
403
+ if(pred[-1]-pred[-2]>0):
404
+ prax[6] = 1
405
+ elif(pred[-1]-pred[-2]==0):
406
+ prax[6] = 0
407
+ else:
408
+ prax[6] = -1
409
+
410
+ # %%
411
+ # Function to train the model (TFT)
412
+ def modelTFT(csv_file, prax):
413
+ train = csv_file
414
+ #test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
415
+ train['date'] = pd.to_datetime(train['Date/Time'])
416
+ #test['date'] = pd.to_datetime(test['Date'])
417
+
418
+ data = pd.concat([train], axis = 0, ignore_index=True)
419
+ # Check that key is country-store-product-date combination
420
+ #assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
421
+ # Check that there is one date per country-store-product combination
422
+ #assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
423
+
424
+ #display(train.sample(4))
425
+
426
+ """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
427
+
428
+ # Add a time_idx (an sequence of consecutive integers that goes from min to max date)
429
+
430
+ data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
431
+ .rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
432
+ # add additional features
433
+ data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
434
+ data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
435
+ data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
436
+ #data["log_num_sold"] = np.log(data.num_sold + 1e-8)
437
+ #data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
438
+ #data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
439
+ #data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
440
+
441
+ #unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
442
+ #unique_dates_country['is_holiday'] = (unique_dates_country
443
+ # .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
444
+ #unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
445
+ # .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
446
+ #unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
447
+ # .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
448
+ #unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
449
+ # .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
450
+ #unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
451
+ # .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
452
+ #data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
453
+ #del unique_dates_country
454
+ gc.collect()
455
+ data.sample(5, random_state=30)
456
+
457
+ train = data.iloc[:len(train)]
458
+ test = data.iloc[len(train):]
459
+
460
+ max_prediction_length = 2
461
+ max_encoder_length = train.date.nunique()
462
+ training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
463
+
464
+ # Let's create a Dataset
465
+ training = TimeSeriesDataSet(
466
+ train[lambda x: x.time_idx <= training_cutoff],
467
+ time_idx="time_idx",
468
+ target="Close",
469
+ group_ids=["Ticker"],
470
+ min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
471
+ max_encoder_length=max_encoder_length,
472
+ max_prediction_length=max_prediction_length,
473
+ static_categoricals=["Ticker"],
474
+ time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
475
+ #variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
476
+ time_varying_known_reals=["time_idx"],
477
+ time_varying_unknown_categoricals=[],
478
+ time_varying_unknown_reals=[
479
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200'
480
+ ],
481
+ target_normalizer=GroupNormalizer(
482
+ groups=['Ticker'], transformation="softplus"
483
+ ), # use softplus and normalize by group
484
+ categorical_encoders={
485
+ 'week_of_year':NaNLabelEncoder(add_nan=True)
486
+ },
487
+ #lags={'num_sold': [7, 30, 365]},
488
+ add_relative_time_idx=True,
489
+ add_target_scales=True,
490
+ add_encoder_length=True,
491
+ )
492
+
493
+ # create validation set (predict=True) which means to predict the last max_prediction_length points in time
494
+ # for each series
495
+ validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
496
+
497
+ # create dataloaders for model
498
+ batch_size = 128 # set this between 32 to 128
499
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
500
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
501
+
502
+ #let's see how a naive model does
503
+
504
+ actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]).cuda()
505
+ baseline_predictions = Baseline().predict(val_dataloader).cuda()
506
+ (actuals - baseline_predictions).abs().mean().item()
507
+
508
+ sm = SMAPE()
509
+
510
+ print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
511
+
512
+ early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
513
+ lr_logger = LearningRateMonitor() # log the learning rate
514
+ logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
515
+
516
+ trainer = pl.Trainer(
517
+ max_epochs=1,
518
+ accelerator=ACCELERATOR,
519
+ enable_model_summary=False,
520
+ gradient_clip_val=0.25,
521
+ limit_train_batches=10, # coment in for training, running valiation every 30 batches
522
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
523
+ callbacks=[lr_logger, early_stop_callback],
524
+ logger=logger,
525
+ )
526
+
527
+ tft = TemporalFusionTransformer.from_dataset(
528
+ training,
529
+ learning_rate=LEARNING_RATE,
530
+ lstm_layers=2,
531
+ hidden_size=16,
532
+ attention_head_size=2,
533
+ dropout=0.2,
534
+ hidden_continuous_size=8,
535
+ output_size=1, # 7 quantiles by default
536
+ loss=SMAPE(),
537
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
538
+ reduce_on_plateau_patience=4
539
+ )
540
+
541
+ tft.to(DEVICE)
542
+ trainer.fit(
543
+ tft,
544
+ train_dataloaders=train_dataloader,
545
+ val_dataloaders=val_dataloader,
546
+ )
547
+ #torch.cuda.empty_cache()
548
+ #print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
549
+
550
+ if OPTUNA:
551
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
552
+
553
+ # create study
554
+ study = optimize_hyperparameters(
555
+ train_dataloader,
556
+ val_dataloader,
557
+ model_path="optuna_test",
558
+ n_trials=5,
559
+ max_epochs=MAX_EPOCHS,
560
+ gradient_clip_val_range=(0.01, 0.3),
561
+ hidden_size_range=(8, 24),
562
+ hidden_continuous_size_range=(8, 12),
563
+ attention_head_size_range=(2, 4),
564
+ learning_rate_range=(0.01, 0.05),
565
+ dropout_range=(0.1, 0.25),
566
+ trainer_kwargs=dict(limit_train_batches=20),
567
+ reduce_on_plateau_patience=4,
568
+ pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_startup_trials=3),
569
+ use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
570
+ )
571
+ #torch.cuda.empty_cache()
572
+ #'''
573
+ trainer = pl.Trainer(
574
+ max_epochs=MAX_EPOCHS,
575
+ accelerator=ACCELERATOR,
576
+ enable_model_summary=False,
577
+ gradient_clip_val=study.best_params['gradient_clip_val'],
578
+ limit_train_batches=20, # coment in for training, running valiation every 30 batches
579
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
580
+ callbacks=[lr_logger, early_stop_callback],
581
+ logger=logger,
582
+ )
583
+
584
+ tft = TemporalFusionTransformer.from_dataset(
585
+ training,
586
+ learning_rate=study.best_params['learning_rate'],
587
+ lstm_layers=2,
588
+ hidden_size=study.best_params['hidden_size'],
589
+ attention_head_size=study.best_params['attention_head_size'],
590
+ dropout=study.best_params['dropout'],
591
+ hidden_continuous_size=study.best_params['hidden_continuous_size'],
592
+ output_size=1, # 7 quantiles by default
593
+ loss=SMAPE(),
594
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
595
+ reduce_on_plateau_patience=4
596
+ )
597
+
598
+ tft.to(DEVICE)
599
+ trainer.fit(
600
+ tft,
601
+ train_dataloaders=train_dataloader,
602
+ val_dataloaders=val_dataloader,
603
+ )
604
+ #'''
605
+ #torch.cuda.empty_cache()
606
+ best_model_path = trainer.checkpoint_callback.best_model_path
607
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
608
+ actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]).cuda()
609
+ predictions = best_tft.predict(val_dataloader, mode="prediction")
610
+ raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
611
+
612
+ sm = SMAPE()
613
+ print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions.cuda()).mean(axis = 1).median().item()}")
614
+ prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
615
+ #best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
616
+
617
+ print(raw_predictions[0][0])
618
+ prax[3] = '-'
619
+ prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
620
+ t = prax[4]
621
+ tm = data['Close'][len(data)-1]
622
+ if(t-tm>0):
623
+ prax[6] = 1
624
+ elif(t-tm==0):
625
+ prax[6] = 0
626
+ else:
627
+ prax[6] = -1
628
+ #prax[i][3] = raw_predictions[0][0].data[1]
629
+ print("-----------")
630
+
631
+ #with open("out.csv", "w", newline="") as f:
632
+ # writer = csv.writer(f)
633
+ # writer.writerows(prax)
634
+
635
+ # %%
636
+ # Function to train the model (TFT)
637
+ def modelTFT_OpenGap(csv_file, prax):
638
+ train = csv_file
639
+ #test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
640
+ train['date'] = pd.to_datetime(train['Date/Time'])
641
+ #test['date'] = pd.to_datetime(test['Date'])
642
+ datLength = len(train)
643
+ train['O-C'] = 0
644
+ for i in range(datLength):
645
+ if i == 0:
646
+ train['O-C'][i] = 0
647
+ continue
648
+ else:
649
+ train['O-C'][i] = train['Open'][i] - train['Close'][i-1]
650
+ data = pd.concat([train], axis = 0, ignore_index=True)
651
+ # Check that key is country-store-product-date combination
652
+ #assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
653
+ # Check that there is one date per country-store-product combination
654
+ #assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
655
+
656
+ #display(train.sample(4))
657
+
658
+ """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
659
+
660
+ # Add a time_idx (an sequence of consecutive integers that goes from min to max date)
661
+
662
+ data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
663
+ .rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
664
+ # add additional features
665
+ data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
666
+ data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
667
+ data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
668
+ #data["log_num_sold"] = np.log(data.num_sold + 1e-8)
669
+ #data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
670
+ #data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
671
+ #data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
672
+
673
+ #unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
674
+ #unique_dates_country['is_holiday'] = (unique_dates_country
675
+ # .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
676
+ #unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
677
+ # .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
678
+ #unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
679
+ # .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
680
+ #unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
681
+ # .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
682
+ #unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
683
+ # .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
684
+ #data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
685
+ #del unique_dates_country
686
+ gc.collect()
687
+ data.sample(5, random_state=30)
688
+
689
+ train = data.iloc[:len(train)]
690
+ test = data.iloc[len(train):]
691
+
692
+ max_prediction_length = 2
693
+ max_encoder_length = train.date.nunique()
694
+ training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
695
+
696
+ # Let's create a Dataset
697
+ training = TimeSeriesDataSet(
698
+ train[lambda x: x.time_idx <= training_cutoff],
699
+ time_idx="time_idx",
700
+ target="Close",
701
+ group_ids=["Ticker"],
702
+ min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
703
+ max_encoder_length=max_encoder_length,
704
+ max_prediction_length=max_prediction_length,
705
+ static_categoricals=["Ticker"],
706
+ time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
707
+ #variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
708
+ time_varying_known_reals=["time_idx"],
709
+ time_varying_unknown_categoricals=[],
710
+ time_varying_unknown_reals=[
711
+ 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200', 'O-C'
712
+ ],
713
+ target_normalizer=GroupNormalizer(
714
+ groups=['Ticker'], transformation="softplus"
715
+ ), # use softplus and normalize by group
716
+ categorical_encoders={
717
+ 'week_of_year':NaNLabelEncoder(add_nan=True)
718
+ },
719
+ #lags={'num_sold': [7, 30, 365]},
720
+ add_relative_time_idx=True,
721
+ add_target_scales=True,
722
+ add_encoder_length=True,
723
+ )
724
+
725
+ # create validation set (predict=True) which means to predict the last max_prediction_length points in time
726
+ # for each series
727
+ validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
728
+
729
+ # create dataloaders for model
730
+ batch_size = 128 # set this between 32 to 128
731
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
732
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
733
+
734
+ #let's see how a naive model does
735
+
736
+ actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]).cuda()
737
+ baseline_predictions = Baseline().predict(val_dataloader).cuda()
738
+ (actuals - baseline_predictions).abs().mean().item()
739
+
740
+ sm = SMAPE()
741
+
742
+ print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
743
+
744
+ early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
745
+ lr_logger = LearningRateMonitor() # log the learning rate
746
+ logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
747
+
748
+ trainer = pl.Trainer(
749
+ max_epochs=1,
750
+ accelerator=ACCELERATOR,
751
+ enable_model_summary=False,
752
+ gradient_clip_val=0.25,
753
+ limit_train_batches=10, # coment in for training, running valiation every 30 batches
754
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
755
+ callbacks=[lr_logger, early_stop_callback],
756
+ logger=logger,
757
+ )
758
+
759
+ tft = TemporalFusionTransformer.from_dataset(
760
+ training,
761
+ learning_rate=LEARNING_RATE,
762
+ lstm_layers=2,
763
+ hidden_size=16,
764
+ attention_head_size=2,
765
+ dropout=0.2,
766
+ hidden_continuous_size=8,
767
+ output_size=1, # 7 quantiles by default
768
+ loss=SMAPE(),
769
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
770
+ reduce_on_plateau_patience=4
771
+ )
772
+
773
+ tft.to(DEVICE)
774
+ trainer.fit(
775
+ tft,
776
+ train_dataloaders=train_dataloader,
777
+ val_dataloaders=val_dataloader,
778
+ )
779
+ #torch.cuda.empty_cache()
780
+ #print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
781
+
782
+ if OPTUNA:
783
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
784
+
785
+ # create study
786
+ study = optimize_hyperparameters(
787
+ train_dataloader,
788
+ val_dataloader,
789
+ model_path="optuna_test",
790
+ n_trials=5,
791
+ max_epochs=MAX_EPOCHS,
792
+ gradient_clip_val_range=(0.01, 0.3),
793
+ hidden_size_range=(8, 24),
794
+ hidden_continuous_size_range=(8, 12),
795
+ attention_head_size_range=(2, 4),
796
+ learning_rate_range=(0.01, 0.05),
797
+ dropout_range=(0.1, 0.25),
798
+ trainer_kwargs=dict(limit_train_batches=20),
799
+ reduce_on_plateau_patience=4,
800
+ pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_warmup_steps=3),
801
+ use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
802
+ )
803
+ #torch.cuda.empty_cache()
804
+ #'''
805
+ trainer = pl.Trainer(
806
+ max_epochs=MAX_EPOCHS,
807
+ accelerator=ACCELERATOR,
808
+ enable_model_summary=False,
809
+ gradient_clip_val=study.best_params['gradient_clip_val'],
810
+ limit_train_batches=20, # coment in for training, running valiation every 30 batches
811
+ #fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
812
+ callbacks=[lr_logger, early_stop_callback],
813
+ logger=logger,
814
+ )
815
+
816
+ tft = TemporalFusionTransformer.from_dataset(
817
+ training,
818
+ learning_rate=study.best_params['learning_rate'],
819
+ lstm_layers=2,
820
+ hidden_size=study.best_params['hidden_size'],
821
+ attention_head_size=study.best_params['attention_head_size'],
822
+ dropout=study.best_params['dropout'],
823
+ hidden_continuous_size=study.best_params['hidden_continuous_size'],
824
+ output_size=1, # 7 quantiles by default
825
+ loss=SMAPE(),
826
+ log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
827
+ reduce_on_plateau_patience=4
828
+ )
829
+
830
+ tft.to(DEVICE)
831
+ trainer.fit(
832
+ tft,
833
+ train_dataloaders=train_dataloader,
834
+ val_dataloaders=val_dataloader,
835
+ )
836
+ #'''
837
+ #torch.cuda.empty_cache()
838
+ best_model_path = trainer.checkpoint_callback.best_model_path
839
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
840
+ actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]).cuda()
841
+ predictions = best_tft.predict(val_dataloader, mode="prediction")
842
+ raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
843
+
844
+ sm = SMAPE()
845
+ print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions.cuda()).mean(axis = 1).median().item()}")
846
+ prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
847
+ #best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
848
+
849
+ print(raw_predictions[0][0])
850
+ prax[3] = '-'
851
+ prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
852
+ t = prax[4]
853
+ tm = data['Close'][len(data)-1]
854
+ if(t-tm>0):
855
+ prax[6] = 1
856
+ elif(t-tm==0):
857
+ prax[6] = 0
858
+ else:
859
+ prax[6] = -1
860
+ #prax[i][3] = raw_predictions[0][0].data[1]
861
+ print("-----------")
862
+
863
+ #with open("out.csv", "w", newline="") as f:
864
+ # writer = csv.writer(f)
865
+ # writer.writerows(prax)
866
+
867
+ # %%
868
+ def generate_csv(data_list):
869
+ filename = f"result.csv"
870
+ file_exists = os.path.isfile(filename)
871
+ with open(filename, mode='a', newline='') as csv_file:
872
+ fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
873
+ writer = csv.writer(csv_file, delimiter=',')
874
+ if not file_exists:
875
+ writer.writerow(fieldnames) # file doesn't exist yet, write a header
876
+ writer.writerow(data_list)
877
+ csv_file.close()
878
+
879
+ def fileOutput():
880
+ today = date.today().strftime("%Y_%m_%d")
881
+ filename = f"result.csv"
882
+ shutil.copyfile(filename, f"result_{today}.csv")
883
+ return f"result_{today}.csv"
884
+
885
+ def guess_date(string):
886
+ for fmt in ["%Y/%m/%d", "%d-%m-%Y", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%Y-%m-%d", "%d/%m/%y", "%m/%d/%y"]:
887
+ try:
888
+ return datetime.datetime.strptime(string, fmt).date()
889
+ except ValueError:
890
+ continue
891
+ raise ValueError(string)
892
+
893
+ # %%
894
+ # Main function
895
+ def main(files):
896
+ # Get a list of all the CSV files uploaded
897
+ prax = [0,0,0,0,0,0,0]
898
+ for idx, file in enumerate(files):
899
+ print(f"File #{idx+1}: {file}")
900
+ print(file.name)
901
+ df = pd.read_csv(file.name)
902
+ print(df['Ticker'][0])
903
+ prax[0] = df['Ticker'][0]
904
+ prax[1] = df['Close'][len(df)-1]
905
+ print('------------------')
906
+ df = df.drop(['EMARSI'], axis=1)
907
+ #df['Date/Time'] = pd.to_datetime(df['Date/Time'])
908
+ for i in range(len(df)):
909
+ x = guess_date(df['Date/Time'][i])
910
+ df['Date/Time'][i] = x.strftime("%Y-%m-%d")
911
+ df['Date/Time'] = pd.to_datetime(df['Date/Time'])
912
+ df.fillna(0, inplace=True)
913
+ #df.to_csv('out.csv')
914
+ modelTFT(df, prax)
915
+ prax[2] = "TFT"
916
+ generate_csv(prax)
917
+ modelTFT_OpenGap(df, prax)
918
+ prax[2] = "TFT_OpenGap"
919
+ generate_csv(prax)
920
+ #df.set_index('Date/Time', inplace=True)
921
+ #df = df.drop(['Date/Time'], axis=1)
922
+ #modelCNNLSTM(df, prax)
923
+ #prax[2] = "CNNLSTM"
924
+ #generate_csv(prax)
925
+ #modelCNNLSTM_OpenGap(df, prax)
926
+ #prax[2] = "CNNLSTM_OpenGap"
927
+ #generate_csv(prax)
928
+ # Generate blank line
929
+ prax=["","","","","","",""]
930
+ generate_csv(prax)
931
+ # Reset prax
932
+ prax = [0,0,0,0,0,0,0]
933
+ f1 = fileOutput()
934
+ return f1
935
+
936
+ gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file")
937
+
938
+
939
+ if __name__ == "__main__":
940
+ # Calling main function
941
+ gradioApp.launch()
app.py CHANGED
@@ -928,12 +928,6 @@ def main(files):
928
  generate_csv(prax)
929
  # Reset prax
930
  prax = [0,0,0,0,0,0,0]
931
-
932
- if os.path.exists("lightning_logs"):
933
- shutil.rmtree("lightning_logs")
934
- # Delete "optuna_test" directory
935
- if os.path.exists("optuna_test"):
936
- shutil.rmtree("optuna_test")
937
  f1 = fileOutput()
938
  return f1
939
 
@@ -943,4 +937,3 @@ gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_typ
943
  if __name__ == "__main__":
944
  # Calling main function
945
  gradioApp.launch()
946
- time.sleep(10)
 
928
  generate_csv(prax)
929
  # Reset prax
930
  prax = [0,0,0,0,0,0,0]
 
 
 
 
 
 
931
  f1 = fileOutput()
932
  return f1
933
 
 
937
  if __name__ == "__main__":
938
  # Calling main function
939
  gradioApp.launch()