Prediction, Time Series
Estimated reading time: 15 minutesFor business time series prediction we can differentiate between the ostensible randomness of a data series and the choice of appropriate models. In this section we have solely highlighted the prediction of continuous targets or resonse variables. All of these models’ objective function can easily be configured to a classification problem i.e. will tomorrow’s stock price increase or decrease from today’s value instead of purely predicting the value.
Stochastic Time-Series
It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots
Asset Class Prediction
Asset Class Prediction: e.g. commodity, stocks and bonds.
- HFT
- Next Day
- Long Term
Data Types | Description | Description |
---|---|---|
Categorical | Data that can be discretely classified. | Country, Exchange, Currency, Dummy Variable, State, Industry. |
Continuous | Data that incrementally changes in values | Past Asset Price, Interest Rate, Competitors Price. |
Stepped | Similar to continuos but changes infrequently | P/E, Quarterly Revenue, |
Transformed Category | A different datatype converted to categorical. | Traded inside standard deviation - yes, no. P/E above 10 - yes, no. |
Models | The prediction of additional models | ARIMA, AR, MA. |
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
import lightgbm as lgbm
learning_rate = 0.8
num_leaves =128
min_data_in_leaf = 1000
feature_fraction = 0.5
bagging_freq=1000
num_boost_round = 1000
params = {"objective": "regression",
"boosting_type": "gbdt",
"learning_rate": learning_rate,
"num_leaves": num_leaves,
"feature_fraction": feature_fraction,
"bagging_freq": bagging_freq,
"verbosity": 0,
"metric": "l2_root",
"nthread": 4,
"subsample": 0.9
}
dtrain = lgbm.Dataset(X_train, y_train)
dvalid = lgbm.Dataset(X_validate, y_test, reference=dtrain)
bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid, verbose_eval=100,early_stopping_rounds=100)
bst.predict(X_test, num_iteration=bst.best_iteration)
import xgboost as xgb
model = xgb.XGBRegressor(
learning_rate=0.037, max_depth=5,
min_child_weight=20, n_estimators=180,
reg_lambda=0.8,booster = 'gbtree',
subsample=0.9, silent=1,
nthread = -1)
model.fit(train[feature_names], target)
pred = model.predict(test[feature_names])
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
conv.add(MaxPooling2D((1, 2)))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasRegressor(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv1D(20, 4, input_shape = PRED.shape[1:3], activation = 'relu'))
conv.add(MaxPooling1D(2))
conv.add(Dense(50, activation='relu'))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasRegressor(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
conv.add(MaxPooling2D((1, 2)))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'binary_crossentropy', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasClassifier(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
Regular Time-Series
It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots
Asset Class Prediction
Asset Class Prediction: e.g. commodity, stocks and bonds.
- HFT
- Next Day
- Long Term
Data Types | Description | Description |
---|---|---|
Categorical | Data that can be discretely classified. | Country, Exchange, Currency, Dummy Variable, State, Industry. |
Continuous | Data that incrementally changes in values | Past Asset Price, Interest Rate, Competitors Price. |
Stepped | Similar to continuos but changes infrequently | P/E, Quarterly Revenue, |
Transformed Category | A different datatype converted to categorical. | Traded inside standard deviation - yes, no. P/E above 10 - yes, no. |
Models | The prediction of additional models | ARIMA, AR, MA. |
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
params = {'en': [4,1,0], 'ja': [7,1,1], 'de': [7,1,1], 'na': [4,1,0], 'fr': [4,1,0], 'zh': [7,1,1], 'ru': [4,1,0], 'es': [7,1,1]}
for key in sums:
data = np.array(sums[key])
result = None
arima = ARIMA(data,params[key])
result = arima.fit(disp=False)
#print(result.params)
pred = result.predict(2,599,typ='levels')
x = [i for i in range(600)]
i=0
print(key)
plt.plot(x[2:len(data)],data[2:] ,label='Data')
plt.plot(x[2:],pred,label='ARIMA Model')
plt.xlabel('Days')
plt.ylabel('Views')
plt.legend()
plt.show()
# Naive decomposition of our Time Series as explained above
decomposition = sm.tsa.seasonal_decompose(df_date_index, model='multiplicative',freq = 7)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
rcParams['figure.figsize'] = 30, 20
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
from fbprophet import Prophet
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=10)
forecast = m.predict(future)
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.