Remember to Restart Solver Often
Estimated reading time: 17 minutesRemember to Restart Solver Often
import pandas
df_hist = pandas.read_excel("soda_sales_historical_data.xlsx")
df_hist[:5]
df_hist[df_hist["Product"]=="11 Down"]
from pandas import DataFrame, get_dummies
categorical_columns = ['Product','Easter Included','Super Bowl Included',
'Christmas Included', 'Other Holiday']
df_hist = get_dummies(df_hist, prefix={k:"dmy_%s"%k for k in categorical_columns},
columns = list(categorical_columns))
df_hist[:5]
Sales | Cost Per Unit | 4 Wk Avg Temp | 4 Wk Avg Humidity | Sales M-1 weeks | Sales M-2 weeks | Sales M-3 weeks | Sales M-4 Weeks | Sales M-5 weeks | dmy_Product_11 Down | ... | dmy_Product_Koala Kola | dmy_Product_Mr. Popper | dmy_Product_Popsi Kola | dmy_Easter Included_No | dmy_Easter Included_Yes | dmy_Super Bowl Included_No | dmy_Super Bowl Included_Yes | dmy_Christmas Included_No | dmy_Christmas Included_Yes | dmy_Other Holiday_No | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 51.9 | 1.6625 | 80.69 | 69.19 | 17.0 | 22.4 | 13.5 | 14.5 | 28.0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
1 | 55.8 | 2.2725 | 80.69 | 69.19 | 2.4 | 2.2 | 2.0 | 1.4 | 0.5 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
2 | 3385.6 | 1.3475 | 80.69 | 69.19 | 301.8 | 188.8 | 101.4 | 81.6 | 213.8 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
3 | 63.5 | 1.6600 | 80.69 | 69.19 | 73.8 | 69.4 | 72.8 | 75.4 | 57.4 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
4 | 181.1 | 1.8725 | 80.69 | 69.19 | 23.1 | 22.6 | 22.1 | 19.9 | 23.2 | 0 | ... | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 1 |
5 rows × 25 columns
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor
from sklearn import model_selection
experiments = {"Algorithm":["Ordinary Least Squares", "Regression Tree",
"Big Random Forest", "Random Forest",
"Bagging"],
"Objects" : [lambda : LinearRegression(),
lambda : DecisionTreeRegressor(),
lambda : RandomForestRegressor(n_estimators=100),
lambda : RandomForestRegressor(),
lambda : BaggingRegressor()],
"Predictions":[[] for _ in range(5)]}
actuals = []
from sklearn.model_selection import train_test_split
for _ in range (4):
train_X, test_X, train_y, test_y = (
train_test_split(df_hist.drop("Sales", axis=1),
df_hist["Sales"], test_size=0.25))
for i, obj_factory in enumerate(experiments["Objects"]):
obj = obj_factory()
obj.fit(y=train_y,X=train_X)
experiments["Predictions"][i] += list(obj.predict(test_X))
actuals += list(test_y)
actuals = pandas.Series(actuals)
experiments["Predictions"] = list(map(pandas.Series, experiments["Predictions"]))
experiments["Results"] = []
for o in experiments["Objects"]:
experiments["Results"].append(
model_selection.cross_val_score(o(), y=df_hist['Sales'],
X=df_hist.drop("Sales", axis=1),
cv=5).mean())
DataFrame(experiments).drop(["Objects", "Predictions"],
axis=1).set_index("Algorithm")
fitted = (experiments["Objects"]
[experiments["Algorithm"].index("Big Random Forest")]().
fit(y=df_hist["Sales"], X=df_hist.drop("Sales", axis=1)))
df_superbowl_original = pandas.read_excel("super_bowl_promotion_data.xlsx")
df_superbowl = get_dummies(df_superbowl_original,
prefix={k:"dmy_%s"%k for k in categorical_columns},
columns = list(categorical_columns))
assert "Sales" not in df_superbowl.columns
assert {"Sales"}.union(df_superbowl.columns).issubset(set(df_hist.columns))
len(df_superbowl)
36
for fld in set(df_hist.columns).difference(df_superbowl.columns, {"Sales"}):
assert fld.startswith("dmy_")
df_superbowl[fld] = 0
df_superbowl = df_superbowl[list(df_hist.drop("Sales", axis=1).columns)]
predicted = fitted.predict(df_superbowl)
forecast_sales = df_superbowl_original[["Product", "Cost Per Unit"]].copy()
forecast_sales["Sales"] = predicted
forecast_sales["Sales"] = (forecast_sales["Sales"]).astype(int)
forecast_sales["Cost Per Unit"] = (forecast_sales["Cost Per Unit"] *1000).astype(int)
forecast_sales = forecast_sales.groupby(['Product','Cost Per Unit']).mean()
forecast_sales.head()
Sales | ||
---|---|---|
Product | Cost Per Unit | |
11 Down | 1455 | 243 |
1512 | 239 | |
1537 | 215 | |
1560 | 180 | |
AB Root Beer | 3730 | 392 |
from __future__ import print_function
from ortools.constraint_solver import pywrapcp as cp
soda_family = {'11 Down': 'Clear', 'AB Root Beer': 'Dark',
'Alpine Stream': 'Clear', 'Bright': 'Clear',
'Crisp Clear': 'Clear', 'DC Kola': 'Dark',
'Koala Kola': 'Dark', 'Mr. Popper': 'Dark',
'Popsi Kola': 'Dark'}
family = set(soda_family[j] for j in soda_family)
soda = set(j for j in soda_family)
max_prom = {f:2 for f in family}
product_prices = set(forecast_sales.index.values)
normal_price = {b:0 for b in soda}
for b,p in product_prices:
normal_price[b] = max(normal_price[b],p)
for j in range(num_tasks): ## This in effect sets constraints on the domain t.append(solver.IntVar(0, 1, “x[%i,%i]” % (i, j)))
from ortools.constraint_solver import pywrapcp as cp
solver = cp.Solver("nothing_fancy")
from ortools.linear_solver import pywraplp ## See this is Linear Programming
solver = pywraplp.Solver(‘SolveIntegerProblem’, pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
The thing is, once you use this the X_array looks wrong
## More Specific Keys
forecast_sales_r =forecast_sales.reset_index()
x_array = []
"""x_array["Product", "Price"]"""
x_product = []
for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values):
t = []
for j, price in enumerate(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"].values):
## This in effect sets constraints on the domain
t.append(solver.BoolVar("x[%s,%s]" % (product, price)))
x_array.extend(t) ## produces a flat array
x_product.append(t)
### Because it occurs in the same order you can also use x_product
products = forecast_sales_r.drop_duplicates("Product")["Product"].values
### This is the problems with strings, you can't call them like this.
### x_array["11 Down"]
x_product[0]
[x[11 Down,1455](0 .. 1),
x[11 Down,1512](0 .. 1),
x[11 Down,1537](0 .. 1),
x[11 Down,1560](0 .. 1)]
int(forecast_sales_r["Sales"].max()+1)
3799
forecast_sales_r["Revenue"] = forecast_sales_r["Sales"]*forecast_sales_r["Cost Per Unit"]
## x_product, x_array
investment = solver.IntVar(0, 350*(1000)*1, "investment")
sales = solver.IntVar(0, int(forecast_sales_r["Sales"].max()*len(forecast_sales_r)), 'sales')
revenue = solver.IntVar(0, int(forecast_sales_r["Revenue"].max()*len(forecast_sales_r)), 'revenue')
# Total cost
solver.Add(sales == solver.Sum(x_array * forecast_sales_r.Sales))
solver.Add(revenue == solver.Sum(forecast_sales.Sales[b,p] * p *
x_array[i] for i, (b,p) in enumerate(product_prices)))
solver.Add(investment ==
solver.Sum(max(0,forecast_sales.Sales[b,p] -
forecast_sales.Sales[b,normal_price[b]]) *
normal_price[b] * x_array[i]
for i, (b,p) in enumerate(product_prices)))
## This one is important as it sets it in motion.
obj = sales
#obj = revevue
### Eks nog steeds nie seker of jy die regte ding maximiseer nie
objective = solver.Maximize(obj, step=1)
{'11 Down',
'AB Root Beer',
'Alpine Stream',
'Bright',
'Crisp Clear',
'DC Kola',
'Koala Kola',
'Mr. Popper',
'Popsi Kola'}
db = solver.Phase(x_array,
solver.CHOOSE_FIRST_UNBOUND,
solver.ASSIGN_MIN_VALUE)
collector = solver.LastSolutionCollector()
for i in range(len(x_array)):
collector.Add(x_array[i])
collector.AddObjective(obj)
collector.AddObjective(obj)
solver.Solve(db, [objective, collector])
True
x_array[1]
x[11 Down,1512](0 .. 1)
if collector.SolutionCount() > 0:
best_solution = collector.SolutionCount() - 1
print("Sales = ", collector.ObjectiveValue(best_solution))
print()
index = []
for i in range(len(x_array)):
if collector.Value(best_solution, x_array[i]) == 1:
print('Item ', i)
index.append(i)
# 12943
Sales = 11982
Item 3
Item 7
Item 11
Item 12
Item 13
Item 14
Item 18
Item 22
Item 26
Item 30
Item 31
Item 35
forecast_sales_r[forecast_sales_r.index.isin(index)]
Product | Cost Per Unit | Sales | Revenue | Product_code | Iteration_code | |
---|---|---|---|---|---|---|
3 | 11 Down | 1560 | 180 | 280800 | 0 | 3 |
7 | AB Root Beer | 3842 | 380 | 1459960 | 1 | 3 |
11 | Alpine Stream | 2227 | 149 | 331823 | 2 | 3 |
12 | Bright | 1272 | 3798 | 4831056 | 3 | 0 |
13 | Bright | 1282 | 2640 | 3384480 | 3 | 1 |
14 | Bright | 1290 | 2491 | 3213390 | 3 | 2 |
18 | Crisp Clear | 1470 | 211 | 310170 | 4 | 3 |
22 | DC Kola | 1932 | 412 | 795984 | 5 | 3 |
26 | Koala Kola | 2565 | 1508 | 3868020 | 6 | 3 |
30 | Mr. Popper | 2900 | 32 | 92800 | 7 | 3 |
31 | Mr. Popper | 2985 | 29 | 86565 | 7 | 4 |
35 | Popsi Kola | 1750 | 152 | 266000 | 8 | 3 |
forecast_sales_r[forecast_sales_r.index.isin(index)]["Sales"].sum()
forecast_sales_r
sol
sales
db = solver.Phase(x_array,
solver.CHOOSE_FIRST_UNBOUND, # The VAR strategy
solver.ASSIGN_MIN_VALUE) # The Value strategy
# Create a solution collector.
collector = solver.LastSolutionCollector()
# Add decision variables
#collector.Add(x_array)
for i in range(num_workers):
collector.Add(x_worker[i])
decision_builder = solver.Phase([x, y],
solver.CHOOSE_FIRST_UNBOUND,
solver.ASSIGN_MIN_VALUE)
I tried my best to make sure that it only gets selected once,
well it did not work
because of some weird out of index issue, its shit
solver.Add((solver.Sum(x_array[i] for i, (b,p) in enumerate(product_prices) if (soda_family[b] == f) and (p != normal_price[b]) ) <= max_prom[f] for f in family))
soda
x_array[0]
x_product
unique_prod = len(forecast_sales_r.drop_duplicates("Product")["Product"].values)
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
forecast_sales_r["Product_code"] = le.fit_transform(forecast_sales_r["Product"])
forecast_sales_r["Iteration_code"] = forecast_sales_r.groupby('Product').cumcount()
forecast_sales_r.head()
for i in forecast_sales_r["Product_code"].unique():
print(i)
## So at three works pretty well but need 4.
## Dis moeilik om met die oneweridhedi the deal
[solver.Add(solver.Sum(x_product[i][j] for i in forecast_sales_r["Product_code"].unique() ) <= 1)
for j in range(len(x_product[i]))]
## I have not ide whether this would work
## will have to work in <= 1
dap = {}
for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values):
dap[product] = len(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"].values)
## So at three works pretty well but need 4.
## Dis moeilik om met die oneweridhedi the deal
[solver.Add(solver.Sum(x_product[i][j] for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values)) <= 1)
for j in range(dap[product])]
## So at three works pretty well but need 4.
## Dis moeilik om met die oneweridhedi the deal
[solver.Add(solver.Sum(x_product[i][j] for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values)) <= 1)
for j in range(len(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"]))]