- 用户在bucket中从1个产品到{N}个产品中进行选择
- 对于每种产品,都有来自供应商的{N}个报价
- 我想让用户有机会告诉他
我想要的输出是:
- 如果您从供应商(A)那里获得5种产品的总成本为87$
- 如果你从供应商(A)和(B)那里获得5种产品的总成本是80$+(x2运费)
- 如果你从供应商(A)(B)和(C)那里获得5种产品的总成本是72$+(x3运费)
蟒蛇熊猫
x = pd.DataFrame.from_dict(OFFERS)
print(x)
offer_price product_id ventor
0 5.0 1 A
1 6.0 1 B
2 7.0 1 C
3 8.0 1 D
4 9.0 1 E
5 5.1 2 A
6 6.1 2 B
7 7.1 2 C
8 8.1 2 D
9 9.1 2 E
10 5.2 3 A
11 6.2 3 B
12 7.2 3 C
13 8.2 3 D
14 9.2 3 E
15 77.2 3 F
16 66.2 3 G
我从服务器得到一个JSON格式的列表
OFFERS = [
{'offer_id': 100, 'product_id': 1, 'vendor':'A', 'offer_price':5.00},
{'offer_id': 101, 'product_id': 1, 'vendor':'B', 'offer_price':6.00},
{'offer_id': 102, 'product_id': 1, 'vendor':'C', 'offer_price':7.00},
{'offer_id': 103, 'product_id': 1, 'vendor':'D', 'offer_price':8.00},
{'offer_id': 104, 'product_id': 1, 'vendor':'E', 'offer_price':9.00},
{'offer_id': 105, 'product_id': 2, 'vendor':'A', 'offer_price':5.10},
{'offer_id': 106, 'product_id': 2, 'vendor':'B', 'offer_price':6.10},
{'offer_id': 107, 'product_id': 2, 'vendor':'C', 'offer_price':7.10},
{'offer_id': 108, 'product_id': 2, 'vendor':'D', 'offer_price':8.10},
{'offer_id': 109, 'product_id': 2, 'vendor':'E', 'offer_price':9.10},
{'offer_id': 110, 'product_id': 3, 'vendor':'A', 'offer_price':5.20},
{'offer_id': 111, 'product_id': 3, 'vendor':'B', 'offer_price':6.20},
{'offer_id': 112, 'product_id': 3, 'vendor':'C', 'offer_price':7.20},
{'offer_id': 113, 'product_id': 3, 'vendor':'D', 'offer_price':8.20},
{'offer_id': 114, 'product_id': 3, 'vendor':'E', 'offer_price':9.20}
]
问题1
我如何获得限制供应商(运输)的最佳报价组合,并获得尽可能好的价格
我想要的输出是:
- 如果您从供应商(A)那里获得5种产品的总成本为87$
- 如果你从供应商(A)和(B)那里获得5种产品的总成本是80$+(x2运费)
- 如果您从供应商(A)(B)和(C)处获得5种产品的总成本为72$+(x3运费)
我的问题是:
- 我想我想解决的问题叫什么名字
- 我需要使用机器学习来解决那个问题吗
到目前为止,我的代码是使用python 3.6
==========================尝试1=======================
经过3个小时的挖掘,我得到了这个,但我认为这个算法非常慢
我的数据格式是:
OFFERS = [
{'offer_id':'oid_1','product_id': 'pid_1', 'vendor':'B', 'offer_price':5.00},
{'offer_id':'oid_1','product_id': 'pid_1', 'vendor':'B', 'offer_price':6.00},
{'offer_id':'oid_2','product_id': 'pid_1', 'vendor':'C', 'offer_price':7.00},
{'offer_id':'oid_3','product_id': 'pid_1', 'vendor':'D', 'offer_price':2.00},
{'offer_id':'oid_4','product_id': 'pid_1', 'vendor':'E', 'offer_price':9.00},
{'offer_id':'oid_5','product_id': 'pid_2', 'vendor':'A', 'offer_price':5.10},
{'offer_id':'oid_6','product_id': 'pid_2', 'vendor':'B', 'offer_price':6.10},
{'offer_id':'oid_7','product_id': 'pid_2', 'vendor':'C', 'offer_price':7.10},
{'offer_id':'oid_8','product_id': 'pid_2', 'vendor':'D', 'offer_price':18.10},
{'offer_id':'oid_9','product_id': 'pid_2', 'vendor':'E', 'offer_price':9.10},
{'offer_id':'oid_10','product_id': 'pid_3', 'vendor':'A', 'offer_price':5.20},
{'offer_id':'oid_11','product_id': 'pid_3', 'vendor':'B', 'offer_price':6.20},
{'offer_id':'oid_12','product_id': 'pid_3', 'vendor':'C', 'offer_price':37.20},
{'offer_id':'oid_13','product_id': 'pid_3', 'vendor':'D', 'offer_price':8.20},
{'offer_id':'oid_14','product_id': 'pid_3', 'vendor':'E', 'offer_price':9.20},
{'offer_id':'oid_15','product_id': 'pid_3', 'vendor':'F', 'offer_price':77.20},
{'offer_id':'oid_16','product_id': 'pid_3', 'vendor':'G', 'offer_price':66.20},
]
app.py
import pandas as pd
import json
from collections import defaultdict, Counter
import itertools
import random
from timeit import default_timer as timer
# START MY TIMER TO ESTIMATE HOW LONG TAKE TO CALCULATE
start = timer()
print('Timer Start')
def generate_random_offers():
''' Generate random offers with this format:
{'offer_id':'oid_1','product_id': 'pid_1', 'ventor':'B', 'offer_price':5.00}
'''
_offers = []
_vendors = ['A','B','C','D','E']
_pids_1 = ['pid_1']
_pids_2 = ['pid_1','pid_2']
_pids_3 = ['pid_1','pid_2','pid_3']
_pids_4 = ['pid_1','pid_2','pid_3','pid_4']
_pids_5 = ['pid_1','pid_2','pid_3','pid_4','pid_5']
_pids_6 = ['pid_1','pid_2','pid_3','pid_4','pid_5','pid_6']
_pids_7 = ['pid_1','pid_2','pid_3','pid_4','pid_5','pid_6','pid_7']
_pids_5 = ['pid_1','pid_2','pid_3','pid_4','pid_5']
for i in range(1, 100):
random_price = round(random.uniform(1, 80), 2)
random_vendor = random.choice(_vendors)
random_pid = random.choice(_pids_4)
print(i)
schema = {}
schema['offer_id'] = f'oid_{i}'
schema['product_id'] = random_pid
schema['ventor'] = random_vendor
schema['offer_price'] = random_price
_offers.append(schema)
# print(_offers)
# write_json_file(_offers)
return _offers
#end
# initiate the variable that gona hold all the offers
OFFERS = []
OFFERS = generate_random_offers()
def get_the_vendors():
''' Return array of all individuals vendors in offers array '''
_vendors = []
for offer in OFFERS:
if offer['ventor'] not in _vendors:
_vendors.append(offer['ventor'])
# print(vendors)
return _vendors
#end
def get_the_products():
''' Get the products that is inside the array '''
_products = []
for offer in OFFERS:
if offer['product_id'] not in _products:
_products.append(offer['product_id'])
# print('products => ', _products)
return _products
#end
def get_offers_base_on_product():
''' Get the offers base on products '''
_offers_by_product = []
PRODUCTS = get_the_products()
for product in PRODUCTS:
_prod = {}
p = []
for offer in OFFERS:
if offer['product_id'] == product:
p.append(offer['offer_id'])
# _prod[offer['product_id']] = p
_prod = p
_offers_by_product.append(_prod)
# print('_offers_by_product', _offers_by_product)
return _offers_by_product
#end
def get_the_vendors_total_product_price():
''' Return the sum of the vendors offers '''
_v = []
VENDORS = get_the_vendors()
for vendor in VENDORS:
v = []
_sum = 0
for offer in OFFERS:
x = {}
if offer['ventor'] == vendor:
_sum += offer['offer_price']
print('sum of ' + vendor + ' => ', _sum)
x['vendor'] = vendor
x['sum'] = _sum
_v.append(x)
print(_v)
return _v
#end
def compinations():
list_of_offers_by_product = get_offers_base_on_product()
a = []
for _list in list_of_offers_by_product:
a.append(_list)
super_compinations = list(itertools.product(*a))
# print('ALL POSSIBLE COMBINATIONS', super_compinations)
print(super_compinations[0])
print(super_compinations[1])
print(super_compinations[2])
return super_compinations
#end
def get_sums():
super_compinations = compinations()
_sums = []
best_price = {}
min_price = 1000
min_set = ''
# for i in range(30):
for i in range(len(super_compinations)):
price = 0
for ii in range(len(super_compinations[i])):
offer_id = super_compinations[i][ii]
for _offer in OFFERS:
try:
if _offer['offer_id'] == offer_id:
price += _offer['offer_price']
# print(price)
except KeyboardInterrupt:
print('Interrupted')
_sums.append(price)
if price < min_price:
min_price = price
min_set = super_compinations[i]
print('========')
print('OFFERS SUMS => ', _sums)
print('========')
print('Min Price: ', min_price)
print('Min Set: ', min_set)
# STOP MY TIMER
elapsed_time = timer() - start # in seconds
print('TOOK: ', elapsed_time)
#end
# Heare a start the program to calculate all the combinations and
after I get all the combinations I try to get the sum of every combination one by one
get_sums()
_offers_by_product [['oid_1', 'oid_1', 'oid_2', 'oid_3', 'oid_4'], ['oid_5', 'oid_6', 'oid_7', 'oid_8', 'oid_9'], ['oid_10', 'oid_11', 'oid_12', 'oid_1
3', 'oid_14', 'oid_15', 'oid_16']]
ALL POSSIBLE COMBINATIONS [
('oid_1', 'oid_5', 'oid_10'),
('oid_1', 'oid_5', 'oid_11'),
('oid_1', 'oid_5', 'oid_12'),
('oid_1', 'oid_5', 'oid_13'),
('oid_1', 'oid_5', 'oid_14'),
('oid_1', 'oid_5', 'oid_15'),
('oid_1', 'oid_5', 'oid_16'),
('oid_1', 'oid_6', 'oid_10'),
('oid_1', 'oid_6', 'oid_11'),
### N..... Possible combinations mabe 1.000.000 milion
]
这是输出1万亿次组合耗时20秒
[4, 177.64, 206.63, 227.38, 152.29, 202.47, 211.85, 195.35, 171.37, 191.94,
187.51999999999998, 122.53999999999999, 139.34, 166.43, 135.62, 167.49, 182.12, 169.79
, 193.42000000000002, 147.42, 176.41, 197.16, 122.07, 172.25, 181.63, 165.13, 141.15, 161.72, 157.3, 150.73999999999998, 167.54, 194.63, 163.82, 195.69, 210.32,
193.54000000000002, 225.41000000000003, 240.04000000000002, 227.71, 251.34000000000003, 205.34,
234.33, 255.08, 179.99, 230.17000000000002, 239.55, 223.05, 192.67000000000002, 213.24, 208.82]
========
Min Price: 14.08
Min Set: ('oid_22', 'oid_16', 'oid_9', 'oid_71')
TOOK: 19.05843851931529
PS C:UsersGeorge35mkDesktopMACHINE LERNING EXAMPLESHello world>
有专家能告诉我的方式是否正确吗
不要使用机器学习,使用现成的求解器进行混合整数规划(这是一个基本的离散优化问题)或设计自己的近似算法。这个问题可能是NP难的,许多流行的NP难问题都有一些共同的特点,人们可以从中学习!
这里有一些演示应该解释使用混合整数编程的基本思想!不过也有一些注意事项:
- 此代码尚未准备好进行生产(演示!)
- bigM需要调整;尤其是对于默认解算器(糟糕;继续阅读!)
- 这段代码使用我最喜欢的建模工具cvxpy(尽管是为其他用例构建的)
- 缺点:默认MIP解算器和它一样糟糕->只有玩具问题!
- 潜在补救措施:
- cvxpy支持使用一些商业解算器(Gurobi、CPLEX、Mosek)(如果可用)
- cvxpy还支持良好的开源求解器(CBC、GLPK),可能需要更复杂的设置(建议使用Linux;请阅读cvxpy的文档)
- 潜在补救措施:
- 缺点:默认MIP解算器和它一样糟糕->只有玩具问题!
MIP解算器对于这类问题应该非常强大。即使在NP困难的情况下,也应该能够在给定一些时间限制(以及一些已证明的边界!)
或者,你可以试试纸浆,它是:
- 易于安装(即使在windows上)
- 带来了良好的默认MIP解算器(CBC)
- 没有那么好(如果一个人喜欢代数式的建模!但有些人喜欢纸浆的风格,它的安装/包装是一流的!)
即使使用这种简单的数学公式,当目标是最佳或良好的近似时,优秀的MIP解算器也很难被击败
代码:
import numpy as np
import scipy as sp
import cvxpy as cvx
np.random.seed(1)
""" Synthetic problem """
N = 3 # Products to oder
M = 5 # Vendors
# Shipping costs
v_ship_c = np.random.choice([1, 3, 5], size=M) # vendor shipping costs if vendor used
# indepenent on number of products
# Product prices
product_prices_mean = np.random.random(size=N) * 50
p_price_v = np.repeat(product_prices_mean, M).reshape(N,M) + np.random.normal(size=(N, M)) * 2
p_price_v = np.clip(p_price_v, 1, np.inf) # min cost of 1
# Product availability
p_v_avail = np.random.choice([0,1], size=(N, M), p=[0.2, 0.8])
assert np.all(np.count_nonzero(p_v_avail, axis=1) > 0) # feasible solution
# Print
print('# products ordered: ', N)
print('# vendors: ', M)
print('Vendor shipping costs')
print(v_ship_c)
print('Mean product prices')
print(product_prices_mean)
print('Vendor-specific product prices')
print(p_price_v)
print('Vendor-product availability')
print(p_v_avail)
""" Optimization problem """
bigM = 1e4 # big-M constant / CRITICAL!
# "http://scip.zib.de/workshop/scip_lodi.pdf"
X = cvx.Bool(N, M) # [p,v] == 1 iff p ordered from v
Y = cvx.Bool(M) # [v] == 1 iff vendor v used -> shipping
objective_product_costs = cvx.sum_entries(cvx.mul_elemwise(p_price_v, X))
objective_shipping_costs = sum(v_ship_c * Y)
objective = cvx.Minimize(objective_product_costs + objective_shipping_costs)
constraints = [cvx.sum_entries(X, axis=1) >= 1] # at least one of each product ordered
# >= more relaxed than == and equal solution
# given costs are positive!
# will never order 2 as more exp than 1!
not_available = np.where(p_v_avail == 0)
constraints.append(X[not_available] == 0) # can't order from v if v not having p
constraints.append(cvx.sum_entries(X, axis=0).T <= cvx.mul_elemwise(bigM, Y)) # indicator if vendor used
problem = cvx.Problem(objective, constraints)
problem.solve()
""" Output solution """
print(problem.status)
print('Total costs: ', problem.value)
print('Product costs: ', round(objective_product_costs.value, 2))
print('Shipping costs: ', round(objective_shipping_costs.value, 2))
print('Order matrix')
print(np.round(X.value))
print('Shipping matrix')
print(np.round(Y.value.T))
输出:
# products ordered: 3
# vendors: 5
Vendor shipping costs
[3 1 1 3 3]
Mean product prices
[ 7.33779454 4.61692974 9.31301057]
Vendor-specific product prices
[[ 5.12592439 4.02876364 2.61085733 9.60848524 5.30376627]
[ 5.89165337 2.89711652 8.162145 2.39620363 4.97935827]
[ 10.4417003 8.17999011 10.77296176 10.05899815 10.38063239]]
Vendor-product availability
[[1 1 1 1 1]
[0 1 1 1 1]
[1 0 1 1 1]]
optimal
Total costs: 18.280935453799668
Product costs: 16.28
Shipping costs: 2.0
Order matrix
[[ 0. 0. 1. 0. 0.]
[ 0. 1. 0. 0. 0.]
[ 0. -0. 1. 0. 0.]]
Shipping matrix
[[ 0. 1. 1. 0. 0.]]
这个玩具解算器在0.01秒内解决了这个小示例。当然,更大的实例会表现得不同!