-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtidy_price.py
249 lines (227 loc) · 10.2 KB
/
tidy_price.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/usr/bin/env python
# _*_coding:utf-8_*_
"""
@Time : 2021/5/31 16:17
@Author : ji hao ran
@File : tidy_price.py
@Project : yy_tools
@Software : PyCharm
"""
import base64
import streamlit as st
import pandas as pd
import numpy as np
import io
# app 全局设置
st.set_page_config(
page_title="I Love YangYang",
page_icon=":heart:",
layout="wide",
initial_sidebar_state="expanded",
)
def download_excel(df, sheet, filename: str = 'download', title='Download'):
"""下载dataframe为csv"""
buffer = io.BytesIO()
with pd.ExcelWriter(buffer) as writer:
for i in range(len(df)):
df[i].to_excel(writer, sheet_name=sheet[i], index=False)
buffer.seek(0)
b64 = base64.b64encode(buffer.read()).decode()
data = 'data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64'
download_str = f'<a href="{data},{b64}" download="{filename}.xlsx">{title}</a>'
return st.markdown(download_str, unsafe_allow_html=True)
def tidy_price(file):
# read excel
df = pd.read_excel(file)
# 选择列
df = df[['商品ID', '一口价(单位元)', '活动价(单位元)']]
# 重命名
df.columns = ['id', 'fixed', 'active']
# 类型转换
df.iloc[:, 0] = df.iloc[:, 0].astype(str)
# df.iloc[:, 1:] = np.ceil(df.iloc[:, 1:])
# 按id分组计算最小值,方差
agg_df = df.groupby('id').agg(['min', 'std'])
agg_df.columns = ['fixed_min', 'fixed_std', 'active_min', 'active_std']
# 分为3个表
# 只有一个商品ID
df1 = agg_df.loc[agg_df[['fixed_std', 'active_std']].apply(lambda x: all(x.isnull()), axis=1)]
# 有多个商品ID, 价格相同
df2 = agg_df.loc[agg_df[['fixed_std', 'active_std']].apply(lambda x: all(x == 0), axis=1)]
# 有多个商品ID, 价格不同
df3 = agg_df.loc[agg_df[['fixed_std', 'active_std']].apply(lambda x: any(x > 0), axis=1)]
# output
def select(d):
# 一口价取整
d['fixed_int'] = d['fixed_min'].apply(lambda x: np.ceil(x))
# 津贴列
if st.session_state['u']:
d['jin_tie'] = d['active_min'].apply(lambda x: jin_tie(x, st.session_state['m'], st.session_state['n']))
else:
d['jin_tie'] = 0
# 优惠券列
man_jian = []
for i in range(1, 5):
if st.session_state[f'u{i}']:
man_jian.append([st.session_state[f'm{i}'], st.session_state[f'n{i}']])
if len(man_jian) > 0:
d['yhq'] = d['active_min'].apply(lambda x: you_hui_quan(x, *man_jian))
else:
d['yhq'] = 0
# 到手价
d['dao_shou'] = d[['active_min', 'jin_tie', 'yhq']].apply(lambda x: np.ceil(x[0] - x[1] - x[2]), axis=1)
# 选择列
d = d[['fixed_min', 'fixed_int', 'active_min', 'jin_tie', 'yhq', 'dao_shou']].sort_values(
'fixed_min').reset_index()
d.columns = ['商品ID', '一口价', '一口价取整', '活动价', '津贴', '优惠券', '到手价']
return d
return select(pd.concat([df1, df2])), select(df3)
def jin_tie(huo_dong_jia, mei_man, jian):
# 计算津贴 每满**减**
return int(huo_dong_jia / mei_man) * jian
def you_hui_quan(huo_dong_jia, *man_jian):
# 计算优惠券 满**减** 多个叠加
r = 0
for i in man_jian:
man, jian = i[0], i[1]
if huo_dong_jia >= man and r < jian:
r = jian
return r
def divide_df(df, max_row: int, sheet_name_prefix: str = '没有起'):
r = []
for i in range(1000):
start_i = max_row * i
end_i = max_row * (i + 1)
if df.shape[0] > end_i:
r.append(df.iloc[start_i:end_i])
else:
r.append(df.iloc[start_i:df.shape[0]])
break
r_name = [f'{sheet_name_prefix}_{i}' for i in range(1, len(r) + 1)]
return r, r_name
# sidebar layout
menu = st.sidebar.radio('功能', ['鹿班打标', '价格检测'])
if menu == '鹿班打标':
with st.sidebar.container():
st.markdown('---')
file = st.file_uploader('上传Excel')
max_row = st.number_input('最大表格行数', min_value=0, value=2000, help='打标上传表格行数限值')
col = st.columns(5)
with col[0].expander('津贴 😀', True):
st.checkbox('启用', True, key='u')
st.number_input('每满', key='m', min_value=0, value=1400, disabled=False if st.session_state['u'] else True)
st.number_input('减', key='n', min_value=0, value=100, disabled=False if st.session_state['u'] else True)
with col[1].expander('优惠券1', True):
st.checkbox('启用', True, key='u1')
st.number_input('满', min_value=0, value=1499, key='m1', disabled=False if st.session_state['u1'] else True)
st.number_input('减', min_value=0, value=50, key='n1', disabled=False if st.session_state['u1'] else True)
with col[2].expander('优惠券2', True):
st.checkbox('启用', key='u2')
st.number_input('满', min_value=0, key='m2', disabled=False if st.session_state['u2'] else True)
st.number_input('减', min_value=0, key='n2', disabled=False if st.session_state['u2'] else True)
with col[3].expander('优惠券3', True):
st.checkbox('启用', key='u3')
st.number_input('满', min_value=0, key='m3', disabled=False if st.session_state['u3'] else True)
st.number_input('减', min_value=0, key='n3', disabled=False if st.session_state['u3'] else True)
with col[4].expander('优惠券4', True):
st.checkbox('启用', key='u4')
st.number_input('满', min_value=0, key='m4', disabled=False if st.session_state['u4'] else True)
st.number_input('减', min_value=0, key='n4', disabled=False if st.session_state['u4'] else True)
b = st.button('计算', key='button')
# main layout
if b:
if file:
with st.spinner('计算中...'):
d1, d2 = tidy_price(file)
tabs = st.tabs(['没有起', '有起'])
with tabs[0]:
st.dataframe(d1, use_container_width=True)
with tabs[1]:
st.dataframe(d2, use_container_width=True)
# 下载
col = st.columns(2)
with col[0]:
cols = ['商品ID', '到手价', '一口价取整']
download_df1 = divide_df(d1[cols], max_row)
download_df2 = divide_df(d2[cols], max_row, '有起')
download = [[*i[0], *i[1]] for i in zip(download_df1, download_df2)]
download_excel(*download, filename=f'{pd.Timestamp.now().date()}打标表', title='下载打标表')
with col[1]:
download_excel([d1, d2], ['没有起', '有起'], filename=f'{pd.Timestamp.now().date()}计算表', title='下载计算表')
st.info("""
备注:
1. 选取目标列[商品ID, 一口价(单位元), 活动价(单位元)]
2. 到手价=向上取整(活动价-津贴-优惠券)
3. 按商品ID列分组计算价格最小值和方差
4. 根据价格方差对结果分类
5. 每个表根据一口价升序排序
6. 没有起:只有一个商品ID或者有多个商品ID, 价格相同。有起:有多个商品ID, 价格不同。
""")
st.balloons()
else:
st.warning('请先上传表格,再点击计算按钮!')
if menu == '价格检测':
c = st.columns(2)
with c[0]:
file1 = st.file_uploader('原始价格Excel')
with c[1]:
file2 = st.file_uploader('现在价格Excel')
b = st.button('计算')
table = st.columns(3)
if b:
if file1 is not None and file2 is not None:
df1 = pd.read_excel(file1)
df2 = pd.read_excel(file2)
df1.drop_duplicates(inplace=True)
df2.drop_duplicates(inplace=True)
# 连接
df = df1.merge(df2, how='outer')
df['价差'] = df['现在价格'] - df['原始价格']
ids = df['外部ID'].value_counts() # id列联表
# 多商品ID
m_ids = ids[ids > 1]
if not m_ids.empty:
# 筛选多个外部ID
m_df = df[df['外部ID'].isin(m_ids.index)]
# 筛选价差大于100
m_df = m_df[m_df['价差'] > 100]
# 分组价差均值
agg = m_df.groupby('外部ID')['价差'].agg(['mean']).sort_values('mean', ascending=False).reset_index()
# 合并
m_df = agg.merge(m_df).rename(columns={'mean': '价差均值'})
# 列排序
m_df = m_df[['外部ID', '原始价格', '现在价格', '价差', '价差均值']].reset_index(drop=True)
with table[0]:
st.markdown('### **表1:多商品ID**')
st.write(m_df)
else:
m_df = pd.DataFrame()
st.info('无多商品ID情形')
# 单商品ID
s_ids = ids[ids == 1]
if not s_ids.empty:
s_df = df[df['外部ID'].isin(s_ids.index)]
# 筛选价差大于100
s_df = s_df[s_df['价差'] > 100]
# 排序
s_df = s_df.sort_values(by='价差', ascending=False).reset_index(drop=True)
with table[1]:
st.markdown('### **表2:单商品ID**')
st.write(s_df)
else:
s_df = pd.DataFrame()
st.info('无单商品ID情形')
# 未上架 (外部ID原始价格无,现在价格有)
no_df = df[df['原始价格'].isna() & ~df['现在价格'].isna()]
if not no_df.empty:
no_df = no_df.sort_values(by='现在价格').reset_index(drop=True)
with table[2]:
st.markdown('### **表3:未上架**')
st.write(no_df)
else:
st.info('无未上架情形')
st.markdown(download_excel([m_df, s_df, no_df], ['多商品ID', '单商品ID', '未上架'],
filename=f'{pd.Timestamp.now().date()}价格检测'), unsafe_allow_html=True)
st.balloons()
else:
st.warning('请先上传表格,再点击计算按钮!')