Prediction, Time Series

Estimated reading time: 15 minutes

For business time series prediction we can differentiate between the ostensible randomness of a data series and the choice of appropriate models. In this section we have solely highlighted the prediction of continuous targets or resonse variables. All of these models’ objective function can easily be configured to a classification problem i.e. will tomorrow’s stock price increase or decrease from today’s value instead of purely predicting the value.

Stochastic Time-Series

It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots

Asset Class Prediction

Asset Class Prediction: e.g. commodity, stocks and bonds.

  • HFT
  • Next Day
  • Long Term
Data Types Description Description
Categorical Data that can be discretely classified. Country, Exchange, Currency, Dummy Variable, State, Industry.
Continuous Data that incrementally changes in values Past Asset Price, Interest Rate, Competitors Price.
Stepped Similar to continuos but changes infrequently P/E, Quarterly Revenue,
Transformed Category A different datatype converted to categorical. Traded inside standard deviation - yes, no. P/E above 10 - yes, no.
Models The prediction of additional models ARIMA, AR, MA.


premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split



X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()
model
import lightgbm as lgbm

learning_rate = 0.8
num_leaves =128
min_data_in_leaf = 1000
feature_fraction = 0.5
bagging_freq=1000
num_boost_round = 1000
params = {"objective": "regression",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "feature_fraction": feature_fraction, 
          "bagging_freq": bagging_freq,
          "verbosity": 0,
          "metric": "l2_root",
          "nthread": 4,
          "subsample": 0.9
          }

    dtrain = lgbm.Dataset(X_train, y_train)
    dvalid = lgbm.Dataset(X_validate, y_test, reference=dtrain)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid, verbose_eval=100,early_stopping_rounds=100)
    bst.predict(X_test, num_iteration=bst.best_iteration)
import xgboost as xgb

model = xgb.XGBRegressor(
                             learning_rate=0.037, max_depth=5, 
                             min_child_weight=20, n_estimators=180,
                             reg_lambda=0.8,booster = 'gbtree',
                             subsample=0.9, silent=1,
                             nthread = -1)

model.fit(train[feature_names], target)

pred = model.predict(test[feature_names])
postmodel
#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split



X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()
model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
    conv.add(MaxPooling2D((1, 2)))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
    return conv
    
model = KerasRegressor(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv1D(20, 4, input_shape = PRED.shape[1:3], activation = 'relu'))
    conv.add(MaxPooling1D(2))
    conv.add(Dense(50, activation='relu'))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
    return conv
            
model = KerasRegressor(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
    conv.add(MaxPooling2D((1, 2)))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'binary_crossentropy', optimizer = sgd, metrics = ['accuracy'])
    return conv
    
model = KerasClassifier(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 



Regular Time-Series

It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots

Asset Class Prediction

Asset Class Prediction: e.g. commodity, stocks and bonds.

  • HFT
  • Next Day
  • Long Term
Data Types Description Description
Categorical Data that can be discretely classified. Country, Exchange, Currency, Dummy Variable, State, Industry.
Continuous Data that incrementally changes in values Past Asset Price, Interest Rate, Competitors Price.
Stepped Similar to continuos but changes infrequently P/E, Quarterly Revenue,
Transformed Category A different datatype converted to categorical. Traded inside standard deviation - yes, no. P/E above 10 - yes, no.
Models The prediction of additional models ARIMA, AR, MA.


premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model
params = {'en': [4,1,0], 'ja': [7,1,1], 'de': [7,1,1], 'na': [4,1,0], 'fr': [4,1,0], 'zh': [7,1,1], 'ru': [4,1,0], 'es': [7,1,1]}

for key in sums:
    data = np.array(sums[key])
    result = None
    arima = ARIMA(data,params[key])
    result = arima.fit(disp=False)
    #print(result.params)
    pred = result.predict(2,599,typ='levels')
    x = [i for i in range(600)]
    i=0
    
    print(key)
    plt.plot(x[2:len(data)],data[2:] ,label='Data')
    plt.plot(x[2:],pred,label='ARIMA Model')
    plt.xlabel('Days')
    plt.ylabel('Views')
    plt.legend()
    plt.show()
    

# Naive decomposition of our Time Series as explained above
decomposition = sm.tsa.seasonal_decompose(df_date_index, model='multiplicative',freq = 7)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
rcParams['figure.figsize'] = 30, 20
postmodel
#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model
from fbprophet import Prophet

m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=10)
forecast = m.predict(future)
postmodel
#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

ts, rnn, xgboost, lgbm, time-series, ml, stocks, assets