Sanket45 commited on
Commit
75ae889
1 Parent(s): 90a8b51

upload python support file

Browse files
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (159 Bytes). View file
 
src/__pycache__/data.cpython-310.pyc ADDED
Binary file (14.5 kB). View file
 
src/__pycache__/model.cpython-310.pyc ADDED
Binary file (1.88 kB). View file
 
src/__pycache__/prediction.cpython-310.pyc ADDED
Binary file (6.07 kB). View file
 
src/data.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from pytorch_forecasting import TimeSeriesDataSet
4
+ from pytorch_forecasting.data import GroupNormalizer
5
+
6
+
7
+
8
+
9
+ class Energy_DataLoader:
10
+ """
11
+ A class for loading and preparing energy consumption data for modeling.
12
+
13
+ Parameters:
14
+ path (str): The path to the data file.
15
+ test_dataset_size (int): The size of the test dataset. Defaults to 24.
16
+ max_prediction_length (int): The maximum prediction length. Defaults to 24.
17
+ max_encoder_length (int): The maximum encoder length. Defaults to 168.
18
+
19
+ Methods:
20
+ load_data(): Loads the energy consumption data from a CSV file.
21
+ data_transformation(data): Performs data transformation and preprocessing.
22
+ lead(df, lead): Computes the lead of the power usage time series for each consumer.
23
+ lag(df, lag): Computes the lag of the power usage time series for each consumer.
24
+ select_chunk(data): Selects a subset of the data corresponding to the top 10 consumers.
25
+ time_features(df): Extracts time-based features from the data.
26
+ data_split(df): Splits the data into training and test datasets.
27
+ tft_data(): Prepares the data for training with the Temporal Fusion Transformer (TFT) model.
28
+ fb_data(): Prepares the data for training with the Facebook Prophet model.
29
+ """
30
+ def __init__(self,path:str,test_dataset_size:int=24,
31
+ max_prediction_length:int=24,
32
+ max_encoder_length:int=168):
33
+ """
34
+ Initialize the Energy_DataLoader class.
35
+
36
+ Parameters:
37
+ path (str): The path to the data file.
38
+ test_dataset_size (int): The size of the test dataset. Defaults to 24.
39
+ max_prediction_length (int): The maximum prediction length. Defaults to 24.
40
+ max_encoder_length (int): The maximum encoder length. Defaults to 168.
41
+ """
42
+ self.path=path
43
+ self.test_dataset_size=test_dataset_size
44
+ self.max_prediction_length=max_prediction_length
45
+ self.max_encoder_length=max_encoder_length
46
+
47
+ def load_data(self):
48
+ """
49
+ Load the energy consumption data from a CSV file.
50
+
51
+ Returns:
52
+ data (pandas.DataFrame): The loaded data.
53
+ """
54
+ try:
55
+ data = pd.read_csv(self.path, index_col=0, sep=';', decimal=',')
56
+ print('Load the data sucessfully.')
57
+ return data
58
+ except:
59
+ print("Load the Data Again")
60
+
61
+ def data_transformation(self,data:pd.DataFrame):
62
+ """
63
+ Perform data transformation and preprocessing.
64
+
65
+ Parameters:
66
+ data (pandas.DataFrame): The input data.
67
+
68
+ Returns:
69
+ data (pandas.DataFrame): The transformed data.
70
+ """
71
+ data.index = pd.to_datetime(data.index)
72
+ data.sort_index(inplace=True)
73
+ # resample the data into hr
74
+ data = data.resample('1h').mean().replace(0., np.nan)
75
+ new_data=data.reset_index()
76
+ new_data['year']=new_data['index'].dt.year
77
+ data1=new_data.loc[(new_data['year']!=2011)]
78
+ data1=data1.set_index('index')
79
+ data1=data1.drop(['year'],axis=1)
80
+ return data1
81
+
82
+ def lead(self,df:pd.DataFrame,lead:int=-1):
83
+ """
84
+ Compute the lead of the power usage time series for each consumer.
85
+
86
+ Parameters:
87
+ df (pandas.DataFrame): The input dataframe.
88
+ lead (int): The lead time period. Defaults to -1.
89
+
90
+ Returns:
91
+ d_lead (pandas.Series): The lead time series.
92
+ """
93
+ d_lead=df.groupby('consumer_id')['power_usage'].shift(lead)
94
+ return d_lead
95
+
96
+ def lag(self,df:pd.DataFrame,lag:int=1):
97
+ """
98
+ Compute the lag of the power usage time series for each consumer.
99
+
100
+ Parameters:
101
+ df (pandas.DataFrame): The input dataframe.
102
+ lag (int): The lag time period. Defaults to 1.
103
+
104
+ Returns:
105
+ d_lag (pandas.Series): The lag time series.
106
+ """
107
+ d_lag=df.groupby('consumer_id')['power_usage'].shift(lag)
108
+ return d_lag
109
+
110
+
111
+ def select_chunk(self,data:pd.DataFrame):
112
+ """
113
+ Select a subset of the data corresponding to the top 10 consumers.
114
+
115
+ Parameters:
116
+ data (pandas.DataFrame): The input data.
117
+
118
+ Returns:
119
+ df (pandas.DataFrame): The selected chunk of data.
120
+ """
121
+ top_10_consumer=data.columns[:10]
122
+ # select Chuck of data intially
123
+ # df=data[['MT_002','MT_004','MT_005','MT_006','MT_008' ]]
124
+ df=data[top_10_consumer]
125
+ return df
126
+
127
+
128
+ def time_features(self,df:pd.DataFrame):
129
+ """
130
+ Extract time-based features from the data.
131
+
132
+ Parameters:
133
+ df (pandas.DataFrame): The input data.
134
+
135
+ Returns:
136
+ time_df (pandas.DataFrame): The dataframe with time-based features.
137
+ earliest_time (pandas.Timestamp): The earliest timestamp in the data.
138
+ """
139
+ earliest_time = df.index.min()
140
+ print(earliest_time)
141
+ df_list = []
142
+ for label in df:
143
+ print()
144
+ ts = df[label]
145
+
146
+ start_date = min(ts.fillna(method='ffill').dropna().index)
147
+ end_date = max(ts.fillna(method='bfill').dropna().index)
148
+ # print(start_date)
149
+ # print(end_date)
150
+ active_range = (ts.index >= start_date) & (ts.index <= end_date)
151
+ ts = ts[active_range].fillna(0.)
152
+
153
+ tmp = pd.DataFrame({'power_usage': ts})
154
+ date = tmp.index
155
+
156
+ tmp['hours_from_start'] = (date - earliest_time).seconds / 60 / 60 + (date - earliest_time).days * 24
157
+ tmp['hours_from_start'] = tmp['hours_from_start'].astype('int')
158
+
159
+ tmp['days_from_start'] = (date - earliest_time).days
160
+ tmp['date'] = date
161
+ tmp['consumer_id'] = label
162
+ tmp['hour'] = date.hour
163
+ tmp['day'] = date.day
164
+ tmp['day_of_week'] = date.dayofweek
165
+ tmp['month'] = date.month
166
+
167
+ #stack all time series vertically
168
+ df_list.append(tmp)
169
+
170
+ time_df = pd.concat(df_list).reset_index(drop=True)
171
+
172
+ lead_1=self.lead(time_df)
173
+ time_df['Lead_1']=lead_1
174
+ lag_1=self.lag(time_df,lag=1)
175
+ time_df['lag_1']=lag_1
176
+ lag_5=self.lag(time_df,lag=5)
177
+ time_df['lag_5']=lag_5
178
+ time_df=time_df.dropna()
179
+ return time_df,earliest_time
180
+
181
+ def data_split(self,df:pd.DataFrame):
182
+ """
183
+ Split the data into training and test datasets.
184
+
185
+ Parameters:
186
+ df (pandas.DataFrame): The input data.
187
+
188
+ Returns:
189
+ train_dataset (pandas.DataFrame): The training dataset.
190
+ test_dataset (pandas.DataFrame): The test dataset.
191
+ training (TimeSeriesDataSet): The training dataset for modeling.
192
+ validation (TimeSeriesDataSet): The validation dataset for modeling.
193
+ """
194
+ ## Train dataset >> train + validation
195
+ train_dataset=df.loc[df['date']<df.date.unique()[-self.test_dataset_size:][0]]
196
+
197
+ ## Test Dataset
198
+ test_dataset=df.loc[df['date']>=df.date.unique()[-self.test_dataset_size:][0]]
199
+
200
+ # training stop cut off
201
+ training_cutoff = train_dataset["hours_from_start"].max() - self.max_prediction_length
202
+ print('training cutoff ::',training_cutoff)
203
+ training = TimeSeriesDataSet(
204
+ train_dataset[lambda x: x.hours_from_start <= training_cutoff],
205
+ time_idx="hours_from_start",
206
+ target="Lead_1",
207
+ group_ids=["consumer_id"],
208
+ min_encoder_length=self.max_encoder_length // 2,
209
+ max_encoder_length=self.max_encoder_length,
210
+ min_prediction_length=1,
211
+ max_prediction_length=self.max_prediction_length,
212
+ static_categoricals=["consumer_id"],
213
+ time_varying_known_reals=['power_usage',"hours_from_start","day","day_of_week",
214
+ "month", 'hour','lag_1','lag_5'],
215
+ time_varying_unknown_reals=['Lead_1'],
216
+ target_normalizer=GroupNormalizer(
217
+ groups=["consumer_id"], transformation="softplus" # softplus: Apply softplus to output (inverse transformation) and #inverse softplus to input,we normalize by group
218
+ ),
219
+ add_relative_time_idx=True, # if to add a relative time index as feature (i.e. for each sampled sequence, the index will range from -encoder_length to prediction_length)
220
+ add_target_scales=True,# if to add scales for target to static real features (i.e. add the center and scale of the unnormalized timeseries as features)
221
+ add_encoder_length=True, # if to add decoder length to list of static real variables. True if min_encoder_length != max_encoder_length
222
+ # lags={"power_usage":[12,24]}
223
+ )
224
+
225
+
226
+ validation = TimeSeriesDataSet.from_dataset(training, train_dataset, predict=True, stop_randomization=True)
227
+
228
+ # create dataloaders for our model
229
+ batch_size = 32
230
+ # if you have a strong GPU, feel free to increase the number of workers
231
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
232
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
233
+ return train_dataset,test_dataset,training,validation
234
+
235
+ def tft_data(self):
236
+ """
237
+ Prepare the data for training with the Temporal Fusion Transformer (TFT) model.
238
+
239
+ Returns:
240
+ train_dataset (pandas.DataFrame): The training dataset.
241
+ test_dataset (pandas.DataFrame): The test dataset.
242
+ training (TimeSeriesDataSet): The training dataset for modeling.
243
+ validation (TimeSeriesDataSet): The validation dataset for modeling.
244
+ earliest_time (pandas.Timestamp): The earliest timestamp in the data.
245
+ """
246
+ df=self.load_data()
247
+ df=self.data_transformation(df)
248
+ df=self.select_chunk(df)
249
+ df,earliest_time=self.time_features(df)
250
+ train_dataset,test_dataset,training,validation =self.data_split(df)
251
+ return train_dataset,test_dataset,training,validation,earliest_time
252
+
253
+ def fb_data(self):
254
+ """
255
+ Prepare the data for training with the Facebook Prophet model.
256
+
257
+ Returns:
258
+ train_data (pandas.DataFrame): The training dataset.
259
+ test_data (pandas.DataFrame): The test dataset.
260
+ consumer_dummay (pandas.Index): The consumer ID columns.
261
+ """
262
+ df=self.load_data()
263
+ df=self.data_transformation(df)
264
+ df=self.select_chunk(df)
265
+ df,earliest_time=self.time_features(df)
266
+ consumer_dummay=pd.get_dummies(df['consumer_id'])
267
+ ## add encoded column into main
268
+ df[consumer_dummay.columns]=consumer_dummay
269
+ updated_df=df.drop(['consumer_id','hours_from_start','days_from_start'],axis=1)
270
+ updated_df=updated_df.rename({'date':'ds',"Lead_1":'y'},axis=1)
271
+
272
+ ## Train dataset >> train + validation
273
+ train_data=updated_df.loc[updated_df['ds']<updated_df.ds.unique()[-self.test_dataset_size:][0]]
274
+
275
+ ## Test Dataset
276
+ test_data=updated_df.loc[updated_df['ds']>=updated_df.ds.unique()[-self.test_dataset_size:][0]]
277
+
278
+ return train_data,test_data,consumer_dummay.columns
279
+
280
+
281
+
282
+ #-------------------------------------------------------------------------------------
283
+ class StoreDataLoader:
284
+ def __init__(self,path):
285
+ self.path=path
286
+ def load_data(self):
287
+ try:
288
+ data = pd.read_csv(self.path)
289
+ data['date']= pd.to_datetime(data['date'])
290
+ items=[i for i in range(1,11)]
291
+ data=data.loc[(data['store']==1) & (data['item'].isin(items))]
292
+ # data['date']=data['date'].dt.date
293
+ print('Load the data sucessfully.')
294
+ return data
295
+ except:
296
+ print("Load the Data Again")
297
+
298
+ def create_week_date_featues(self,df,date_column):
299
+
300
+ df['Month'] = pd.to_datetime(df[date_column]).dt.month
301
+
302
+ df['Day'] = pd.to_datetime(df[date_column]).dt.day
303
+
304
+ df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek
305
+
306
+ df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear
307
+
308
+ df['Week'] = pd.to_datetime(df[date_column]).dt.week
309
+
310
+ df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter
311
+
312
+ df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1)
313
+
314
+ df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1)
315
+
316
+ df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1)
317
+
318
+ df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1)
319
+
320
+ df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1)
321
+
322
+ df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1)
323
+
324
+ df['Semester'] = np.where(df[date_column].isin([1,2]),1,2)
325
+
326
+ df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0)
327
+
328
+ df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0)
329
+
330
+ df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month
331
+
332
+ return df
333
+
334
+ def lead(self,df,lead=-1):
335
+ d_lead=df.groupby(['store','item'])['sales'].shift(lead)
336
+ return d_lead
337
+ def lag(self,df,lag=1):
338
+ d_lag=df.groupby(['store','item'])['sales'].shift(lag)
339
+ return d_lag
340
+
341
+ def time_features(self,df):
342
+ earliest_time = df['date'].min()
343
+ print(earliest_time)
344
+
345
+ df['hours_from_start'] = (df['date'] - earliest_time).dt.seconds / 60 / 60 + (df['date'] - earliest_time).dt.days * 24
346
+ df['hours_from_start'] = df['hours_from_start'].astype('int')
347
+
348
+ df['days_from_start'] = (df['date'] - earliest_time).dt.days
349
+ # new_weather_data['date'] = date
350
+ # new_weather_data['consumer_id'] = label
351
+
352
+ df=self.create_week_date_featues(df,'date')
353
+
354
+
355
+ # change dtypes of store
356
+ df['store']=df['store'].astype('str')
357
+ df['item']=df['item'].astype('str')
358
+ df['sales']=df['sales'].astype('float')
359
+
360
+
361
+ df["log_sales"] = np.log(df.sales + 1e-8)
362
+ df["avg_demand_by_store"] = df.groupby(["days_from_start", "store"], observed=True).sales.transform("mean")
363
+ df["avg_demand_by_item"] = df.groupby(["days_from_start", "item"], observed=True).sales.transform("mean")
364
+ # items=[str(i) for i in range(1,11)]
365
+ # df=df.loc[(df['store']=='1') & (df['item'].isin(items))]
366
+ # df=df.reset_index(drop=True)
367
+ d_1=self.lead(df)
368
+ df['Lead_1']=d_1
369
+ d_lag1=self.lag(df,lag=1)
370
+ df['lag_1']=d_lag1
371
+ d_lag5=self.lag(df,lag=5)
372
+ df['lag_5']=d_lag5
373
+ df=df.dropna()
374
+ return df,earliest_time
375
+
376
+ def split_data(self,df,test_dataset_size=30,max_prediction_length=30,max_encoder_length=120):
377
+ # df=self.load_data()
378
+ # df,earliest_time=self.time_features(df)
379
+ ## Train dataset >> train + validation
380
+ train_dataset=df.loc[df['date']<df.date.unique()[-test_dataset_size:][0]]
381
+
382
+ ## Test Dataset
383
+ test_dataset=df.loc[df['date']>=df.date.unique()[-test_dataset_size:][0]]
384
+
385
+
386
+ training_cutoff = train_dataset["days_from_start"].max() - max_prediction_length
387
+ print("Training cutoff point ::",training_cutoff)
388
+
389
+ training = TimeSeriesDataSet(
390
+ train_dataset[lambda x: x.days_from_start <= training_cutoff],
391
+ time_idx="days_from_start",
392
+ target="Lead_1", ## target use as lead
393
+ group_ids=['store','item'],
394
+ min_encoder_length=max_encoder_length // 2,
395
+ max_encoder_length=max_encoder_length,
396
+ min_prediction_length=1,
397
+ max_prediction_length=max_prediction_length,
398
+ static_categoricals=["store",'item'],
399
+ static_reals=[],
400
+ time_varying_known_categoricals=[],
401
+
402
+ time_varying_known_reals=["days_from_start","Day", "Month","Dayofweek","DayOfyear","Days_in_month",'Week', 'Quarter',
403
+ 'Is_month_start', 'Is_month_end', 'Is_quarter_start', 'Is_quarter_end',
404
+ 'Is_year_start', 'Is_year_end', 'Semester', 'Is_weekend', 'Is_weekday','Dayofweek', 'DayOfyear','lag_1','lag_5','sales'],
405
+
406
+ time_varying_unknown_reals=['Lead_1','log_sales','avg_demand_by_store','avg_demand_by_item'],
407
+
408
+ target_normalizer=GroupNormalizer(
409
+ groups=["store","item"], transformation="softplus"
410
+ ), # we normalize by group
411
+ add_relative_time_idx=True,
412
+ add_target_scales=True,
413
+ add_encoder_length=True, #
414
+ allow_missing_timesteps=True,
415
+
416
+ )
417
+
418
+
419
+ validation = TimeSeriesDataSet.from_dataset(training, train_dataset, predict=True, stop_randomization=True)
420
+
421
+ # create dataloaders for our model
422
+ batch_size = 32
423
+ # if you have a strong GPU, feel free to increase the number of workers
424
+ train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
425
+ val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
426
+ return train_dataset,test_dataset,training,validation
427
+
428
+ def tft_data(self):
429
+ df=self.load_data()
430
+ df,earliest_time=self.time_features(df)
431
+ train_dataset,test_dataset,training,validation=self.split_data(df)
432
+ return train_dataset,test_dataset,training,validation,earliest_time
433
+
434
+ def fb_data(self,test_dataset_size=30):
435
+ df=self.load_data()
436
+ df,earliest_time=self.time_features(df)
437
+ store_dummay=pd.get_dummies(df['store'],prefix='store')
438
+ # store_dummay.head()
439
+
440
+ item_dummay=pd.get_dummies(df['item'],prefix='item')
441
+ # item_dummay.head()
442
+
443
+ df_encode=pd.concat([store_dummay,item_dummay],axis=1)
444
+ # df_encode.head()
445
+ ## add encoded column into main
446
+ df[df_encode.columns]=df_encode
447
+ df=df.drop(['store','item','log_sales','avg_demand_by_store','avg_demand_by_item'],axis=1)
448
+ df=df.rename({'date':'ds',"Lead_1":'y'},axis=1)
449
+ fb_train_data = df.loc[df['ds'] <= '2017-11-30']
450
+ fb_test_data = df.loc[df['ds'] > '2017-11-30']
451
+ # fb_train_data=df.loc[df['ds']<df.ds.unique()[-test_dataset_size:][0]]
452
+ # fb_test_data=df.loc[df['ds']>=df.ds.unique()[-test_dataset_size:][0]]
453
+
454
+ return fb_train_data,fb_test_data,item_dummay,store_dummay
455
+
456
+
457
+ if __name__=='__main__':
458
+ obj=Energy_DataLoader(r'D:\Ai Practices\Transformer Based Forecasting\stremlit app\LD2011_2014.txt')
459
+ obj.load()
460
+
src/model.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from pathlib import Path
3
+ import warnings
4
+ import lightning.pytorch as pl
5
+ import numpy as np
6
+ import pandas as pd
7
+ import torch
8
+ from prophet.serialize import model_to_json, model_from_json
9
+ from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
10
+ from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
11
+
12
+ # at beginning of the script
13
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14
+
15
+ class Model_Load:
16
+ def __init__(self):
17
+ pass
18
+ def energy_model_load(self,model_option):
19
+ if model_option=='TFT':
20
+ best_model_path='models/consumer_final_10/lightning_logs/lightning_logs/version_0/checkpoints/epoch=5-step=49260.ckpt'
21
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
22
+ print('Model Load Sucessfully.')
23
+ return best_tft
24
+ elif model_option=='Prophet':
25
+ best_model_path='models/fb_energy_model.json'
26
+ with open(best_model_path, 'r') as fin:
27
+ model = model_from_json(fin.read())
28
+ return model
29
+
30
+ # elif model_option=='ten consumer':
31
+ # best_model_path='consumer_10/lightning_logs/lightning_logs/version_0/checkpoints/epoch=11-step=98544.ckpt'
32
+ # best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
33
+ # print('Model Load Sucessfully.')
34
+ # elif model_option=='fifty consumer':
35
+ # raise Exception('Model not present')
36
+
37
+
38
+ def store_model_load(self,model_option):
39
+ if model_option=='TFT':
40
+ # best_model_path="models/store_item_10_lead_1_v2/lightning_logs/lightning_logs/version_2/checkpoints/epoch=7-step=4472.ckpt"
41
+ best_model_path="models/store_item_10_lead_1_v3/lightning_logs/lightning_logs/version_0/checkpoints/epoch=7-step=4472.ckpt"
42
+ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
43
+ # best_tft = TemporalFusionTransformer()
44
+ # best_tft.load_state_dict(torch.load(best_model_path,map_location=torch.device('cpu')))
45
+ # best_tft.to('cpu')
46
+ print('Model Load Sucessfully.')
47
+ return best_tft
48
+ elif model_option=='Prophet':
49
+ best_model_path='models/fb_store_model_new.json'
50
+ with open(best_model_path, 'r') as fin:
51
+ model = model_from_json(fin.read())
52
+ return model
53
+
54
+ # elif model_option=='Item 50 TFT':
55
+ # raise Exception('Model not present')
56
+ # elif model_option=='FB Prophet':
57
+ # raise Exception('Model not present')
58
+
59
+
60
+
61
+
62
+ if __name__=='__main__':
63
+ obj=Model_Load()
64
+ obj.load()
65
+
src/prediction.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import mean_absolute_error,mean_squared_error
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ def create_week_date_featues(df,date_column):
6
+
7
+ df['Month'] = pd.to_datetime(df[date_column]).dt.month
8
+
9
+ df['Day'] = pd.to_datetime(df[date_column]).dt.day
10
+
11
+ df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek
12
+
13
+ df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear
14
+
15
+ df['Week'] = pd.to_datetime(df[date_column]).dt.week
16
+
17
+ df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter
18
+
19
+ df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1)
20
+
21
+ df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1)
22
+
23
+ df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1)
24
+
25
+ df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1)
26
+
27
+ df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1)
28
+
29
+ df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1)
30
+
31
+ df['Semester'] = np.where(df[date_column].isin([1,2]),1,2)
32
+
33
+ df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0)
34
+
35
+ df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0)
36
+
37
+ df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month
38
+
39
+ return df
40
+
41
+ def val_prediction(validation,model:object,train_dataset:pd.DataFrame(),store_id:str='1',item_id:str='1'):
42
+ predictions = model.predict(validation.filter(lambda x: (x.store ==store_id) & (x.item ==item_id)),
43
+ return_y=True,
44
+ return_x=True,
45
+ trainer_kwargs=dict(accelerator="cpu"))
46
+
47
+ filter_train=train_dataset.loc[(train_dataset['store']==store_id) & (train_dataset['item']==item_id)].reset_index(drop=True)
48
+ # print(filter_train)
49
+ training_results=filter_train.iloc[-30:,:]
50
+ y=[float(i) for i in predictions.output[0]]
51
+ y_true=[float(i) for i in predictions.y[0][0]]
52
+ x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
53
+ training_results['prediction']=y
54
+ training_results['y_true']=y_true
55
+ training_results['x']=x
56
+ rmse=np.around(np.sqrt(mean_squared_error(training_results['Lead_1'],y)),2)
57
+ mae=np.around(mean_absolute_error(training_results['Lead_1'],y),2)
58
+ print(f" VAL DATA = Item ID : {item_id} :: MAE : {mae} :: RMSE : {rmse}")
59
+ return training_results
60
+
61
+ def test_prediction(model:object,train_dataset,test_dataset,earliest_time,max_encoder_length=120,store_id:str='1',item_id:str='1'):
62
+ #encoder data is the last lookback window: we get the last 1 week (168 datapoints) for all 5 consumers = 840 total datapoints
63
+ encoder_data = train_dataset[lambda x: x.days_from_start > x.days_from_start.max() - max_encoder_length]
64
+ last_data = train_dataset[lambda x: x.days_from_start == x.days_from_start.max()]
65
+ # decoder_data = pd.concat(
66
+ # [last_data.assign(date=lambda x: x.date + pd.offsets.DateOffset(i)) for i in range(1, 30 + 1)],
67
+ # ignore_index=True,
68
+ # )
69
+
70
+ # decoder_data["hours_from_start"] = (decoder_data["date"] - earliest_time).dt.seconds / 60 / 60 + (decoder_data["date"] - earliest_time).dt.days * 24
71
+ # decoder_data['hours_from_start'] = decoder_data['hours_from_start'].astype('int')
72
+ # decoder_data["hours_from_start"] += encoder_data["hours_from_start"].max() + 1 - decoder_data["hours_from_start"].min()
73
+ # # add time index consistent with "data"
74
+ # decoder_data["days_from_start"] = (decoder_data["date"] - earliest_time).apply(lambda x:x.days)
75
+ # decoder_data=create_week_date_featues(decoder_data,'date')
76
+ decoder_data=test_dataset.copy()
77
+
78
+ new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
79
+ filter_test=new_prediction_data.loc[(new_prediction_data['store']==store_id) & (new_prediction_data['item']==item_id)]
80
+ predictions = model.predict(filter_test,
81
+ return_y=True,
82
+ return_x=True,
83
+ trainer_kwargs=dict(accelerator="cpu"))
84
+
85
+ # print(filter_test)
86
+ testing_results=test_dataset.loc[(test_dataset['store']=='1') & (test_dataset['item']==item_id)]
87
+ y=[float(i) for i in predictions.output[0]]
88
+ y_true=[float(i) for i in predictions.y[0][0]]
89
+ x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
90
+ testing_results['prediction']=y
91
+ testing_results['y_true']=y_true
92
+ testing_results['x']=x
93
+ return testing_results
94
+
95
+
96
+
97
+ #-------------------------------------------------------------
98
+
99
+ def val_pred(model:object,train_dataset,validation,consumer_id:str='MT_001'):
100
+ predictions = model.predict(validation.filter(lambda x: (x.consumer_id ==consumer_id)),
101
+ return_y=True,
102
+ return_x=True,
103
+ trainer_kwargs=dict(accelerator="cpu"))
104
+
105
+ filter_train=train_dataset.loc[(train_dataset['consumer_id']==consumer_id)].reset_index(drop=True)
106
+
107
+ # print(filter_train)
108
+ # filter validation data
109
+ val_results=filter_train.iloc[-24:,:]
110
+
111
+ # prediction
112
+ y=[float(i) for i in predictions.output[0]]
113
+ # actual
114
+ y_true=[float(i) for i in predictions.y[0][0]]
115
+ # time idx
116
+ x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
117
+ # update into the validation results
118
+ val_results['prediction']=y
119
+ val_results['y_true']=y_true
120
+ val_results['x']=x
121
+ # RMSE & MAE for validation data
122
+ rmse=np.around(np.sqrt(mean_squared_error(val_results['Lead_1'],y)),2)
123
+ mae=np.around(mean_absolute_error(val_results['Lead_1'],y),2)
124
+
125
+ print(f" VAL DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
126
+ return val_results
127
+
128
+ def test_pred(model:object,train_dataset,test_dataset,consumer_id:str='MT_001',max_encoder_length:int=168):
129
+ encoder_data = train_dataset[lambda x: x.hours_from_start > x.hours_from_start.max() - max_encoder_length]
130
+ last_data = train_dataset[lambda x: x.hours_from_start == x.hours_from_start.max()]
131
+
132
+ decoder_data=test_dataset.copy()
133
+
134
+ new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
135
+
136
+ filter_train=new_prediction_data.loc[ (new_prediction_data['consumer_id']==consumer_id)]
137
+ predictions = model.predict(filter_train,
138
+ return_y=True,
139
+ return_x=True,
140
+ trainer_kwargs=dict(accelerator="cpu"))
141
+
142
+ # print(filter_train)
143
+ testing_results=test_dataset.loc[(test_dataset['consumer_id']==consumer_id)]
144
+
145
+ y=[float(i) for i in predictions.output[0]]
146
+ y_true=[float(i) for i in predictions.y[0][0]]
147
+ x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
148
+
149
+ testing_results['prediction']=y
150
+ testing_results['y_true']=y_true
151
+ testing_results['x']=x
152
+
153
+ rmse=np.around(np.sqrt(mean_squared_error(testing_results['Lead_1'],y)),2)
154
+ mae=np.around(mean_absolute_error(testing_results['Lead_1'],y),2)
155
+ print(f"TEST DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
156
+ return testing_results
157
+
158
+
159
+
160
+
161
+