# 绘制各个年龄段的饼图plt.figure(figsize = (8, 6), dpi = 100)plt.pie(res.values, labels = ['middle', 'young', 'old'], autopct = '%.2f%%', pctdistance = 0.8, counterclock = False, wedgeprops = {'width': 0.4})plt.title('fenbu')#plt.savefig('./age fenbu.png')
fig, axs = plt.subplots(1, 2, figsize = (12, 7), dpi = 100)axs[0].pie([len(data_merge1.loc[data_merge1['vip']==1,'dtime'].unique()),len(data_merge1.loc[data_merge1['vip']==0,'dtime'].unique())],labels = ['vip','normal'], wedgeprops = {'width': 0.4}, counterclock = False, autopct = '%.2f%%', pctdistance = 0.8)axs[0].set_title('total dingdan%')axs[1].pie([data_merge1.loc[data_merge1['vip'] == 1, 'je'].sum(), data_merge1.loc[data_merge1['vip'] == 0, 'je'].sum()], labels = ['vip', 'normal'], wedgeprops = {'width': 0.4}, counterclock = False, autopct = '%.2f%%', pctdistance = 0.8)axs[1].set_title('total je%')
不同季度和天为单位的消费时间偏好
# 将会员的消费数据另存为另一个数据集df_vip = df1.dropna()df_vip.drop(['会员'], axis = 1, inplace = True)df_vip.index = range(df_vip.shape[0])df_vip.info()
# 将“消费产生的时间”转变成日期格式df_vip['消费产生的时间'] = pd.to_datetime(df_vip['消费产生的时间'])# 新增四列数据,季度、天、年份和月份的字段df_vip['年份'] = df_vip['消费产生的时间'].dt.yeardf_vip['月份'] = df_vip['消费产生的时间'].dt.monthdf_vip['季度'] = df_vip['消费产生的时间'].dt.quarterdf_vip['天'] = df_vip['消费产生的时间'].dt.daydf_vip.head()
# 前提假设:2015-2018年之间,消费者偏好在时间上不会发生太大的变化(均值),消费偏好——>以不同时间的订单数来衡量quarters_list, quarters_order = orders(df_vip, '季度', 3)days_list, days_order = orders(df_vip, '天', 36)time_list = [quarters_list, days_list]order_list = [quarters_order, days_order]maxindex_list = [quarters_order.index(max(quarters_order)), days_order.index(max(days_order))]fig, axs = plt.subplots(1, 2, figsize = (18, 7), dpi = 100)colors = np.random.choice(['r', 'g', 'b', 'orange', 'y'], replace = False, size = len(axs))titles = ['季度的均值消费偏好', '天数的均值消费偏好']labels = ['季度', '天数']for i in range(len(axs)):ax = axs[i]ax.plot(time_list[i], order_list[i], linestyle = '-.', c = colors[i], marker = 'o', alpha = 0.85)ax.axvline(x = time_list[i][maxindex_list[i]], linestyle = '--', c = 'k', alpha = 0.8)ax.set_title(titles[i])ax.set_xlabel(labels[i])ax.set_ylabel('均值消费订单数')print(f'{titles[i]}最优的时间为: {time_list[i][maxindex_list[i]]}\t 对应的均值消费订单数为: {order_list[i][maxindex_list[i]]}')plt.savefig('./季度和天数的均值消费偏好情况.png')
不同年份之间的的季度或天数的消费订单差异
# 自定义函数来绘制不同年份之间的的季度或天数的消费订单差异def plot_qd(df, label_y, label_m, nrow, ncol):"""df: 为DataFrame的数据集label_y: 为年份的字段标签label_m: 为标签的一个列表n_row: 图的行数n_col: 图的列数"""# 必须去掉最后一年的数据,只能对2015-2017之间的数据进行分析y_list = np.sort(df[label_y].unique().tolist())[:-1]colors = np.random.choice(['r', 'g', 'b', 'orange', 'y', 'k', 'c', 'm'], replace = False, size = len(y_list))markers = ['o', '^', 'v']plt.figure(figsize = (8, 6), dpi = 100)fig, axs = plt.subplots(nrow, ncol, figsize = (16, 7), dpi = 100)for k in range(len(label_m)):m_list = np.sort(df[label_m[k]].unique().tolist())for i in range(len(y_list)):order_m = []index1 = df[label_y] == y_list[i]for j in range(len(m_list)):index2 = df[label_m[k]] == m_list[j]order_m.append(len(df.loc[index1 & index2, '消费产生的时间'].unique()))axs[k].plot(m_list, order_m, linestyle ='-.', c = colors[i], alpha = 0.8, marker = markers[i], label = y_list[i], markersize = 4)axs[k].set_xlabel(f'{label_m[k]}')axs[k].set_ylabel('消费订单数')axs[k].set_title(f'2015-2018年会员的{label_m[k]}消费订单差异')axs[k].legend()plt.savefig(f'./2015-2018年会员的{"和".join(label_m)}消费订单差异.png')
plot_qd(df_vip, '年份', ['季度', '天'], 1, 2)
不同年份之间的月份消费订单差异
# 自定义函数来绘制不同年份之间的月份消费订单差异def plot_ym(df, label_y, label_m):"""df: 为DataFrame的数据集label_y: 为年份的字段标签label_m: 为月份的字段标签"""# 必须去掉最后一年的数据,只能对2015-2017之间的数据进行分析y_list = np.sort(df[label_y].unique().tolist())[:-1]m_list = np.sort(df[label_m].unique().tolist())colors = np.random.choice(['r', 'g', 'b', 'orange', 'y'], replace = False, size = len(y_list))markers = ['o', '^', 'v']fig, axs = plt.subplots(1, 2, figsize = (18, 8), dpi = 100)for i in range(len(y_list)):order_m = []money_m = []index1 = df[label_y] == y_list[i]for j in range(len(m_list)):index2 = df[label_m] == m_list[j]order_m.append(len(df.loc[index1 & index2, '消费产生的时间'].unique()))money_m.append(df.loc[index1 & index2, '消费金额'].sum())axs[0].plot(m_list, order_m, linestyle ='-.', c = colors[i], alpha = 0.8, marker = markers[i], label = y_list[i])axs[1].plot(m_list, money_m, linestyle ='-.', c = colors[i], alpha = 0.8, marker = markers[i], label = y_list[i])axs[0].set_xlabel('月份')axs[0].set_ylabel('消费订单数')axs[0].set_title('2015-2018年会员的消费订单差异')axs[1].set_xlabel('月份')axs[1].set_ylabel('消费金额总数')axs[1].set_title('2015-2018年会员的消费金额差异')axs[0].legend()axs[1].legend()plt.savefig('./2015-2018年会员的消费订单和金额差异.png')
# 调用函数plot_ym(df_vip, '年份', '月份')
# 再来分析下时间上的差差异——消费订单数df_vip['时间'] = df_vip['消费产生的时间'].dt.hourx_list, order_nums = orders(df_vip, '时间', 1)
maxindex = order_nums.index(max(order_nums))plt.figure(figsize = (8, 6), dpi = 100)plt.plot(x_list, order_nums, linestyle = '-.', marker = 'o', c = 'm', alpha = 0.8)plt.xlabel('小时')plt.ylabel('消费订单')plt.axvline(x = x_list[maxindex], linestyle = '--', c = 'r', alpha = 0.6)plt.title('2015-2018年各段小时的销售订单数')plt.savefig('./2015-2018年各段小时的销售订单数.png')
fig, axs = plt.subplots(1, 2, figsize = (12, 7), dpi = 100)axs[0].pie([len(data23.loc[data23['vip']==1,'dtime'].unique()),len(data23.loc[data23['vip']==0,'dtime'].unique())],labels = ['vip','normal'], wedgeprops = {'width': 0.4}, counterclock = False, autopct = '%.2f%%', pctdistance = 0.8)axs[0].set_title('total dingdan%')axs[1].pie([data23.loc[data23['vip'] == 1, 'je'].sum(), data23.loc[data23['vip'] == 0, 'je'].sum()], labels = ['vip', 'normal'], wedgeprops = {'width': 0.4}, counterclock = False, autopct = '%.2f%%', pctdistance = 0.8)axs[1].set_title('total je%')