Remember to Restart Solver Often

Estimated reading time: 17 minutes

Remember to Restart Solver Often

import pandas
df_hist = pandas.read_excel("soda_sales_historical_data.xlsx")
df_hist[:5]

df_hist[df_hist["Product"]=="11 Down"]

from pandas import DataFrame, get_dummies
categorical_columns = ['Product','Easter Included','Super Bowl Included', 
                       'Christmas Included', 'Other Holiday']
df_hist = get_dummies(df_hist, prefix={k:"dmy_%s"%k for k in categorical_columns},
                      columns = list(categorical_columns))
df_hist[:5]
Sales Cost Per Unit 4 Wk Avg Temp 4 Wk Avg Humidity Sales M-1 weeks Sales M-2 weeks Sales M-3 weeks Sales M-4 Weeks Sales M-5 weeks dmy_Product_11 Down ... dmy_Product_Koala Kola dmy_Product_Mr. Popper dmy_Product_Popsi Kola dmy_Easter Included_No dmy_Easter Included_Yes dmy_Super Bowl Included_No dmy_Super Bowl Included_Yes dmy_Christmas Included_No dmy_Christmas Included_Yes dmy_Other Holiday_No
0 51.9 1.6625 80.69 69.19 17.0 22.4 13.5 14.5 28.0 1 ... 0 0 0 1 0 1 0 0 1 1
1 55.8 2.2725 80.69 69.19 2.4 2.2 2.0 1.4 0.5 0 ... 0 0 0 1 0 1 0 0 1 1
2 3385.6 1.3475 80.69 69.19 301.8 188.8 101.4 81.6 213.8 0 ... 0 0 0 1 0 1 0 0 1 1
3 63.5 1.6600 80.69 69.19 73.8 69.4 72.8 75.4 57.4 0 ... 0 0 0 1 0 1 0 0 1 1
4 181.1 1.8725 80.69 69.19 23.1 22.6 22.1 19.9 23.2 0 ... 0 0 1 1 0 1 0 0 1 1

5 rows × 25 columns

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import BaggingRegressor
from sklearn import model_selection
experiments = {"Algorithm":["Ordinary Least Squares", "Regression Tree", 
                            "Big Random Forest", "Random Forest", 
                            "Bagging"], 
               "Objects" : [lambda : LinearRegression(), 
                            lambda : DecisionTreeRegressor(), 
                            lambda : RandomForestRegressor(n_estimators=100), 
                            lambda : RandomForestRegressor(), 
                            lambda : BaggingRegressor()], 
               "Predictions":[[] for _ in range(5)]}
actuals = []
from sklearn.model_selection import train_test_split
for _ in range (4):
    train_X, test_X, train_y, test_y = (
        train_test_split(df_hist.drop("Sales", axis=1), 
                         df_hist["Sales"], test_size=0.25))
    for i, obj_factory in enumerate(experiments["Objects"]):
        obj = obj_factory()
        obj.fit(y=train_y,X=train_X)
        experiments["Predictions"][i] += list(obj.predict(test_X))
    actuals += list(test_y)
actuals = pandas.Series(actuals)
experiments["Predictions"] = list(map(pandas.Series, experiments["Predictions"]))
experiments["Results"] = []
for o in experiments["Objects"]:
    experiments["Results"].append(
        model_selection.cross_val_score(o(), y=df_hist['Sales'], 
                                        X=df_hist.drop("Sales", axis=1),
                                        cv=5).mean())
DataFrame(experiments).drop(["Objects", "Predictions"], 
                            axis=1).set_index("Algorithm")

fitted = (experiments["Objects"]
          [experiments["Algorithm"].index("Big Random Forest")]().
          fit(y=df_hist["Sales"], X=df_hist.drop("Sales", axis=1)))
df_superbowl_original = pandas.read_excel("super_bowl_promotion_data.xlsx")
df_superbowl = get_dummies(df_superbowl_original, 
                           prefix={k:"dmy_%s"%k for k in categorical_columns},
                           columns = list(categorical_columns))
assert "Sales" not in df_superbowl.columns 
assert {"Sales"}.union(df_superbowl.columns).issubset(set(df_hist.columns))
len(df_superbowl)
36
for fld in set(df_hist.columns).difference(df_superbowl.columns, {"Sales"}):
    assert fld.startswith("dmy_")
    df_superbowl[fld] = 0
    
df_superbowl = df_superbowl[list(df_hist.drop("Sales", axis=1).columns)]

predicted = fitted.predict(df_superbowl)

forecast_sales = df_superbowl_original[["Product", "Cost Per Unit"]].copy()
forecast_sales["Sales"] = predicted

forecast_sales["Sales"] = (forecast_sales["Sales"]).astype(int)

forecast_sales["Cost Per Unit"] = (forecast_sales["Cost Per Unit"]  *1000).astype(int)

forecast_sales = forecast_sales.groupby(['Product','Cost Per Unit']).mean()
forecast_sales.head()
Sales
Product Cost Per Unit
11 Down 1455 243
1512 239
1537 215
1560 180
AB Root Beer 3730 392
from __future__ import print_function
from ortools.constraint_solver import pywrapcp as cp

soda_family = {'11 Down': 'Clear', 'AB Root Beer': 'Dark', 
               'Alpine Stream': 'Clear', 'Bright': 'Clear', 
               'Crisp Clear': 'Clear', 'DC Kola': 'Dark',
               'Koala Kola': 'Dark', 'Mr. Popper': 'Dark', 
               'Popsi Kola': 'Dark'}
family  = set(soda_family[j] for j in soda_family)
soda    = set(j for j in soda_family)
max_prom = {f:2 for f in family}

product_prices = set(forecast_sales.index.values)
normal_price = {b:0 for b in soda}
for b,p in product_prices:
    normal_price[b] = max(normal_price[b],p)

for j in range(num_tasks): ## This in effect sets constraints on the domain t.append(solver.IntVar(0, 1, “x[%i,%i]” % (i, j)))

from ortools.constraint_solver import pywrapcp as cp

solver = cp.Solver("nothing_fancy")

from ortools.linear_solver import pywraplp ## See this is Linear Programming

solver = pywraplp.Solver(‘SolveIntegerProblem’, pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)

The thing is, once you use this the X_array looks wrong

## More Specific Keys
forecast_sales_r =forecast_sales.reset_index()

x_array = []

"""x_array["Product", "Price"]"""
x_product = []
for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values):
    t = []
    for j, price in enumerate(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"].values):
        ## This in effect sets constraints on the domain
        t.append(solver.BoolVar("x[%s,%s]" % (product, price)))
    x_array.extend(t) ## produces a flat array
    x_product.append(t)
    
### Because it occurs in the same order you can also use x_product
products = forecast_sales_r.drop_duplicates("Product")["Product"].values
### This is the problems with strings, you can't call them like this.
### x_array["11 Down"]
x_product[0]
[x[11 Down,1455](0 .. 1),
 x[11 Down,1512](0 .. 1),
 x[11 Down,1537](0 .. 1),
 x[11 Down,1560](0 .. 1)]
int(forecast_sales_r["Sales"].max()+1)
3799
forecast_sales_r["Revenue"] = forecast_sales_r["Sales"]*forecast_sales_r["Cost Per Unit"]

## x_product, x_array
investment = solver.IntVar(0, 350*(1000)*1, "investment")
sales = solver.IntVar(0, int(forecast_sales_r["Sales"].max()*len(forecast_sales_r)), 'sales')
revenue = solver.IntVar(0, int(forecast_sales_r["Revenue"].max()*len(forecast_sales_r)), 'revenue')

# Total cost
solver.Add(sales == solver.Sum(x_array * forecast_sales_r.Sales))
solver.Add(revenue == solver.Sum(forecast_sales.Sales[b,p] * p * 
                                 x_array[i] for i, (b,p) in enumerate(product_prices)))
solver.Add(investment == 
                solver.Sum(max(0,forecast_sales.Sales[b,p] - 
                            forecast_sales.Sales[b,normal_price[b]]) *
                        normal_price[b] * x_array[i] 
                        for i, (b,p) in enumerate(product_prices)))
## This one is important as it sets it in motion. 
obj = sales
#obj = revevue

### Eks nog steeds nie seker of jy die regte ding maximiseer nie
objective = solver.Maximize(obj, step=1)
{'11 Down',
 'AB Root Beer',
 'Alpine Stream',
 'Bright',
 'Crisp Clear',
 'DC Kola',
 'Koala Kola',
 'Mr. Popper',
 'Popsi Kola'}
db = solver.Phase(x_array,
                solver.CHOOSE_FIRST_UNBOUND,
                solver.ASSIGN_MIN_VALUE)

collector = solver.LastSolutionCollector()
for i in range(len(x_array)):
    collector.Add(x_array[i])
collector.AddObjective(obj)

collector.AddObjective(obj)
solver.Solve(db, [objective, collector])

True
x_array[1]
x[11 Down,1512](0 .. 1)
if collector.SolutionCount() > 0:
    best_solution = collector.SolutionCount() - 1
    print("Sales = ", collector.ObjectiveValue(best_solution))
    print()
    index = []
    for i in range(len(x_array)):
        if collector.Value(best_solution, x_array[i]) == 1:
            print('Item ', i)
            index.append(i)


# 12943
Sales =  11982

Item  3
Item  7
Item  11
Item  12
Item  13
Item  14
Item  18
Item  22
Item  26
Item  30
Item  31
Item  35
forecast_sales_r[forecast_sales_r.index.isin(index)]
Product Cost Per Unit Sales Revenue Product_code Iteration_code
3 11 Down 1560 180 280800 0 3
7 AB Root Beer 3842 380 1459960 1 3
11 Alpine Stream 2227 149 331823 2 3
12 Bright 1272 3798 4831056 3 0
13 Bright 1282 2640 3384480 3 1
14 Bright 1290 2491 3213390 3 2
18 Crisp Clear 1470 211 310170 4 3
22 DC Kola 1932 412 795984 5 3
26 Koala Kola 2565 1508 3868020 6 3
30 Mr. Popper 2900 32 92800 7 3
31 Mr. Popper 2985 29 86565 7 4
35 Popsi Kola 1750 152 266000 8 3
forecast_sales_r[forecast_sales_r.index.isin(index)]["Sales"].sum()
forecast_sales_r
sol
sales
db = solver.Phase(x_array,
                solver.CHOOSE_FIRST_UNBOUND, # The VAR strategy
                solver.ASSIGN_MIN_VALUE)     # The Value strategy

# Create a solution collector.
collector = solver.LastSolutionCollector()
# Add decision variables
#collector.Add(x_array)

for i in range(num_workers):
    collector.Add(x_worker[i])
decision_builder = solver.Phase([x, y],
                                  solver.CHOOSE_FIRST_UNBOUND,
                                  solver.ASSIGN_MIN_VALUE)
I tried my best to make sure that it only gets selected once,
well it did not work

because of some weird out of index issue, its shit

solver.Add((solver.Sum(x_array[i] for i, (b,p) in enumerate(product_prices) if (soda_family[b] == f) and (p != normal_price[b]) ) <= max_prom[f] for f in family))

soda

x_array[0]

x_product

unique_prod = len(forecast_sales_r.drop_duplicates("Product")["Product"].values)

from sklearn import preprocessing
le = preprocessing.LabelEncoder()



forecast_sales_r["Product_code"] = le.fit_transform(forecast_sales_r["Product"])

forecast_sales_r["Iteration_code"] = forecast_sales_r.groupby('Product').cumcount()

forecast_sales_r.head()

for i in forecast_sales_r["Product_code"].unique():
    print(i)

## So at three works pretty well but need 4. 
## Dis moeilik om met die oneweridhedi the deal 
[solver.Add(solver.Sum(x_product[i][j] for i in forecast_sales_r["Product_code"].unique() ) <= 1) 
 for j in range(len(x_product[i]))]


## I have not ide whether this would work
## will have to work in  <= 1
dap = {}
for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values):
    dap[product] = len(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"].values)

## So at three works pretty well but need 4. 
## Dis moeilik om met die oneweridhedi the deal 
[solver.Add(solver.Sum(x_product[i][j] for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values)) <= 1)
 for j in range(dap[product])]



## So at three works pretty well but need 4. 
## Dis moeilik om met die oneweridhedi the deal 
[solver.Add(solver.Sum(x_product[i][j] for i, product in enumerate(forecast_sales_r.drop_duplicates("Product")["Product"].values)) <= 1)
 for j in range(len(forecast_sales_r[forecast_sales_r["Product"]==product]["Cost Per Unit"]))]