import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
data = pd.read_csv('https://code.s3.yandex.net/datasets/energy_consumption.csv', index_col=[0], parse_dates=[0])
data.sort_index(inplace=True)
data = data.resample('1D').sum()
def make_features(data, max_lag, rolling_mean_size):
data['year'] = data.index.year
data['month'] = data.index.month
data['day'] = data.index.day
data['dayofweek'] = data.index.dayofweek
for lag in range(1, max_lag + 1):
data['lag_{}'.format(lag)] = data['PJME_MW'].shift(lag)
data['rolling_mean'] = data['PJME_MW'].shift().rolling(rolling_mean_size).mean()
# мы выбрали произвольные значения аргументов
make_features(data, 1, 1)
train, test = train_test_split(data, shuffle=False, test_size=0.2)
train = train.dropna()
print(train.shape)
print(test.shape)