文章目录
- 一、分析目标
- 二、数据说明
- 三、实现步骤
- 1.建立用户指标体系
- 2.建立分阶段用户标签
- 2.1 按用户行为统计每日的不同阶段用户规模
- 2.2 按周为单位分析最近两周的不同阶段用户的变化
- 2.3 对比分析新用户群体与激活用户群体的人群画像、行为数据特点
- 3、用户复购分析
- 3.1 计算2月1日至4月30日用户的复购情况
- 3.2 对比复购用户和非复购用户的用户画像、行为特点
- 3.3 结合最后一周的分阶段用户标签,分析近期复购用户的用户激活、用户流失的情况
- 4、报告
一、分析目标
1.搭建日常运营指标体系;
2.分析现有用户构成,统计每日用户构成情况;
3.分析用户复购情况,为后续复购运营做指导。
二、数据说明
User_info 用户信息表:
user_action 用户行为表:
三、实现步骤
首先需要搭建运营指标体系,让运营人员能够看到用户规模以及规模的变动状态;然后我们需要按照用户阶段(新用户、激活用户、睡眠用户、流失用户)将用户区分出来,统计每天不同阶段的用户数量,让运营能够了解不同阶段用户规模,设计更加贴切的用户运营方案。
建表:
create table user_info
(user_id string,
age_between string,
sex int,
user_level int,
reg_time date
)
row format delimited fields terminated by '\t'
tblproperties("skip.header.line.count"="1"); load data local inpath '/home/hadoop/datas/HomeWork/user_info_sample.txt' into table
user_info;ALTER TABLE user_info SET SERDEPROPERTIES ('serialization.encoding'='GBK');create table User_action
(user_id string,
sku_id string,
action_time Timestamp,
model_id int,
type int,
cate int,
brand int
)
row format delimited fields terminated by '\t'
tblproperties("skip.header.line.count"="1"); --转载时跳过第一行
--装载数据
load data local inpath '/home/hadoop/datas/HomeWork/user_action_sample.txt' into
table user_action;
1.建立用户指标体系
我们需要根据用户活跃指标来逐步建立用户指标体系:
- 每日活跃用户数:DAU,即每日活跃用户的排重统计量;
selectdate(action_time) action_time,count(distinct user_id) DAU
fromuser_action
group bydate(action_time);
- 每日新注册用户数:DNU,即每日新注册用户的排重统计量;
selectreg_time,count(distinct user_id)
from user_info
group by reg_time;
- 每日消费转化率:即每日所有用户的有下单行为的用户数与每日活跃用户数的百分比;
selectdate(action_time) action_time,concat(round(count(distinct if(type=4,user_id,Null))/count(distinct user_id)*100,2),"%") as per_con_rate
from user_action
group by date(action_time)
order by date(action_time);
- 每日新用户消费转化率:即每日新注册用户中,有下单行为的用户数与每日新注册用户数的百分比;
selectreg_time,concat(round(count(distinct if(type=4,user_info.user_id,Null))/count(distinct user_info.user_id)*100,2),"%") as new_per_con_rate
from user_info
left join user_action on user_info.reg_time=date(user_action.action_time)
and user_info.user_id=user_action.user_id
group by reg_time
order by reg_time;
分析近期各个指标的变动特点:
select action_time,concat(round((DAU - DAU_1)/DAU_1 *100,2),"%") as DAU_DOD,concat(round((DNU - DNU_1)/DNU_1 *100,2),"%") as DNU_DOD,concat(round((per_con_rate - per_con_rate_1) / per_con_rate_1 *100,2),"%") as per_con_rate_DOD,concat(round((new_per_con_rate - new_per_con_rate_1) / new_per_con_rate_1 *100,2),"%") as new_per_con_rate_DOD,concat(round((DAU - DAU_7)/DAU_7 *100,2),"%") as DAU_WOW,concat(round((DNU - DNU_7)/DNU_7 *100,2),"%") as DNU_WOW,concat(round((per_con_rate - per_con_rate_7) / per_con_rate_7 *100,2),"%") as per_con_rate_WOW,concat(round((new_per_con_rate - new_per_con_rate_7) / new_per_con_rate_7 *100,2),"%") as new_per_con_rate_WOW
from(select *,lag(DAU,1) over(order by action_time) as DAU_1,lag(DNU,1) over(order by action_time) as DNU_1,lag(per_con_rate,1) over(order by action_time) as per_con_rate_1,lag(new_per_con_rate,1) over(order by action_time) as new_per_con_rate_1,lag(DAU,7) over(order by action_time) as DAU_7,lag(DNU,7) over(order by action_time) as DNU_7,lag(per_con_rate,7) over(order by action_time) as per_con_rate_7,lag(new_per_con_rate,7) over(order by action_time) as new_per_con_rate_7from(select date(action_time) action_time,count(distinct user_id) DAU,concat(round(count(distinct if(type=4,user_id,Null))/count(distinct user_id)*100,2),"%") as per_con_ratefrom user_actiongroup by date(action_time) ) as table_aleft join(select reg_time,count(distinct user_info.user_id) as DNU,concat(round(count(distinct if(type=4,user_info.user_id,Null))/count(distinct user_info.user_id)*100,2),"%") as new_per_con_ratefrom user_infoleft join user_action on user_info.user_id=user_action.user_idand reg_time=date(action_time)group by reg_time) as table_b on table_a.action_time=table_b.reg_time) as table_c
order by action_time;
2.建立分阶段用户标签
- 将注册当天的用户作为新用户
riqi=reg_time
- 将注册未满一周的用户作为未激活用户(为了避免新用户成为跳出用户)
(max_time-reg_time<7 or max_time is Null) and riqi-reg_time<14
- 注册后的第8天,用户进入激活期,激活期为注册后的第8到第14天,在激活期回访的用户,则成为激活用户。(为了让未激活用户成为激活用户)
max_time-reg_time>7 and riqi-max_time<7
- 若用户成为激活用户后,有一周没有活跃;或从注册后在激活期(第8到第14天)未激活且第14天之后也未活跃的用户作为睡眠用户。(主要做唤醒)
(max_time-reg_time>7 and riqi-max-time>7 and riqi-max_time<=21)
or(riqi-reg_time>14 and riqi-reg_time<=28 and (max_time-reg_time<7 or max_time is Null))
- 将成为睡眠用户后,有两周没有活跃的用户作为流失用户。(虽然同为唤醒范围,但是流失用户的唤醒难度要大于睡眠用户,在预算较少的唤醒活动中不予考虑。)
(max_time-reg_time>7 and riqi-max_time>21) or
(riqi-reg_time>28 and (max_time-reg_time<7 or max_time is Null))
之所以我们需要统计不同阶段的用户,是因为我们仅从DAU、DNU或MAU数据都很难去观察我们用户池的用户构成如何,用户是沉淀下来了逐渐变多?还是在逐渐减少?这些是需要我们通过将不同阶段用户细拆之后才能统计出来的。
2.1 按用户行为统计每日的不同阶段用户规模
当新增用户、未激活用户、激活用户增加,且睡眠用户、流失用户减少,那我们的用户规模就在呈上升趋势;反之,则呈下降趋势。分析近期我们用户规模的变动趋势,判断我们的用户池的健康程度。
-- 准备统计日期数据(每日都要统计,故将用户行为表中的时间单独拉出作为统计时间)
create table riqi as
select date(action_time) riqi from user_action group by date(action_time);-- 准备最后一次用户行为时间(由于统计日统计的是上一次的行为时间,故约束统计日期上一次行为时间作为最后一次行为时间。riqi表是单独拉出的连续时间,并非用户最后一次行为时间,riqi表时间与用户行为时间无关)
create table base_info as
select riqi,user_id,reg_time,(select max(action_time) max_time from user_action where date(action_time)<riqi.riqi and user_id=user_info.user_id) as max_time
from riqi,user_info;select riqi,user_type,count(*)
from(select *,case when riqi=reg_time then "new_user"when ( day(max_time-reg_time)<7 or max_time is Null) and day(riqi-reg_time)<14 then "inaction_user"when day(max_time-reg_time)>7 and day(riqi-max_time)<7 then "action_user"when (day(max_time-reg_time)>7 and day(riqi-max_time)>7 and day(riqi-max_time)<=21) or (day(riqi-reg_time)>14 and day(riqi-reg_time)<=28 and (day(max_time-reg_time)<7 or max_time is Null)) then "sleep_user"when (day(max_time-reg_time)>7 and day(riqi-max_time)>21) or (day(riqi-reg_time)>28 and (day(max_time-reg_time)<7 or max_time is Null)) then "lose_user"end as user_typefrom base_info
) as a
group by riqi,user_type;
2.2 按周为单位分析最近两周的不同阶段用户的变化
用户量变化、人均浏览数变化、人均点击数变化、人均加购物车数变化、人均下单数变化、购买转化率变化。
select if(date(action_time)>"2016-04-08","week_1","week_2") as week,count(distinct user_id) DAU_week,round(count(if(type=1,user_id,Null))/count(distinct user_id),2) avg_user1,round(count(if(type=6,user_id,Null))/count(distinct user_id),2) avg_user6,round(count(if(type=2,user_id,Null))/count(distinct user_id),2) avg_user2,round(count(if(type=4,user_id,Null))/count(distinct user_id),2) avg_user4,concat(round(count(distinct if(type=4,user_id,Null))/count(distinct user_id)*100,2),"%") buy_rate
from(
select * from user_action where date(action_time)>"2016-04-01"
) as a
group by date(action_time)>"2016-04-08";
2.3 对比分析新用户群体与激活用户群体的人群画像、行为数据特点
简述近期产品引入的新用户群体是否健康。
-- 新用户群体
select age_between,sex,count(distinct a.user_id) count_user,round(count(if(type=1,a.user_id,Null))/count(distinct a.user_id),2) avg_user1,round(count(if(type=6,a.user_id,Null))/count(distinct a.user_id),2) avg_user6,round(count(if(type=2,a.user_id,Null))/count(distinct a.user_id),2) avg_user2,round(count(if(type=4,a.user_id,Null))/count(distinct a.user_id),2) avg_user4,concat(round(count(distinct if(type=4,a.user_id,Null))/count(distinct a.user_id)*100,2),"%") buy_rate
from
(select * from user_info where reg_time>"2016-04-08" and reg_time<="2016-04-15") as a
left join
(select * from user_action where date(action_time)>"2016-04-08") as b
on a.user_id=b.user_id
group by age_between,sex
grouping sets(age_between,sex);-- 激活用户群体
select age_between,sex,count(distinct a.user_id) count_user,round(count(if(type=1,a.user_id,Null))/count(distinct a.user_id),2) avg_user1,round(count(if(type=6,a.user_id,Null))/count(distinct a.user_id),2) avg_user6,round(count(if(type=2,a.user_id,Null))/count(distinct a.user_id),2) avg_user2,round(count(if(type=4,a.user_id,Null))/count(distinct a.user_id),2) avg_user4,concat(round(count(distinct if(type=4,a.user_id,Null))/count(distinct a.user_id)*100,2),"%") buy_rate
from
(select * from user_info where reg_time<="2016-04-08") as a
inner join
(select * from user_action where date(action_time)>"2016-04-08") as b
on a.user_id=b.user_id
group by age_between,sex
grouping sets(age_between,sex);
3、用户复购分析
对于用户运营来说,由于获客成本居高不下,提升用户复购是将用户价值最大化的关键。
那到底什么时候该关注复购,凯文·希尔斯特罗姆在《精益数据分析》一书中给了参考:
-
90天内重复购买率达到1%~15%;说明你处于用户获取模式;把更多的精力和资源投入到新用户获取和转化;
-
90天内重复购买率达到15~30%;说明你处于混合模式;平衡用在新用户转化和老用户留存、复购上的精力和资源;
-
90天内重复购买率达到30%以上;说明你处于忠诚度模式;把更多的精力和资源投入到用户复购上;
3.1 计算2月1日至4月30日用户的复购情况
- 用户复购率 = 复购用户数 / 活跃用户数;
- 复购用户数 :一段时间内购买次数达两次及以上的用户数
经过统计,用户的复购率为8.87%,说明产品处于用户获取模式。
select
count(if(buy_count>=2,user_id,Null)) re_buy,
count(distinct user_id) AU,
concat(round(count(if(buy_count>=2,user_id,Null))/count(distinct user_id)*100,2),"%") as re_buy_rate
from
(select user_id,count(if(type=4,user_id,Null)) buy_count
from user_action
where date(action_time)>="2016-02-01"
group by user_id) as a;
3.2 对比复购用户和非复购用户的用户画像、行为特点
结合用户信息表和用户购买行为表,可以看出复购用户的特点:复购用户26-35岁的男性居多。
-- 复购用户
select age_between,sex,
count(if(buy_count>=2,a.user_id,Null)) re_buy,
count(distinct a.user_id) AU,
concat(round(count(if(buy_count>=2,a.user_id,Null))/count(distinct a.user_id)*100,2),"%") as re_buy_rate
from
(select user_id,count(if(type=4,user_id,Null)) buy_count from user_action
group by user_id) as a
left join
(select user_id,age_between,sex from user_info) as b
on a.user_id=b.user_id
group by age_between,sex
grouping sets(age_between,sex);-- 非复购用户
select age_between,sex,
count(if(buy_count<2,a.user_id,Null)) not_re_buy,
count(distinct a.user_id) AU,
concat(round(count(if(buy_count<2,a.user_id,Null))/count(distinct a.user_id)*100,2),"%") as not_re_buy_rate
from
(select user_id,count(if(type=4,user_id,Null)) buy_count from user_action
group by user_id) as a
left join
(select user_id,age_between,sex from user_info) as b
on a.user_id=b.user_id
group by age_between,sex
grouping sets(age_between,sex);
3.3 结合最后一周的分阶段用户标签,分析近期复购用户的用户激活、用户流失的情况
select user_type,count(distinct user_id) user_num
from(
select distinct a.user_id as user_id,case when reg_time="2016-04-15" then "new_user"when (day(max_time-reg_time)<=7 or max_time is Null)and reg_time>"2016-04-01" then "inaction_user"when day(max_time-reg_time)>7 and max_time>"2016-04-08" then "action_user"when (day(max_time-reg_time)>7 and max_time<"2016-04-08" and max_time>="2016-03-25") or (reg_time<"2016-04-01" and reg_time>="2016-03-18" and (day(max_time-reg_time)<7 or max_time is Null)) then "sleep_user"when (day(max_time-reg_time)>7 and max_time<"2016-03-25") or (reg_time<"2016-03-18" and (day(max_time-reg_time)<7 or max_time is Null)) then "lose_user"end as user_type
from
(select a1.user_id user_id,a1.action_time action_time,max(a1.action_time)over(partition by a1.user_id) max_time
from(select user_id,date(action_time) action_time,count(user_id)over(partition by user_id) buy_countfrom user_action where type=4) as a1 where buy_count>=2)as a
left join
(select user_id,reg_time from user_info) as b
on a.user_id=b.user_id
) as c
group by user_type;
4、报告
https://gitee.com/Ariel999/data-analysis-case/blob/master/Hive%E5%AD%A6%E4%B9%A0/%E7%94%A8%E6%88%B7%E8%BF%90%E8%90%A5%E5%89%8D%E6%9C%9F%E5%88%86%E6%9E%90%E6%8A%A5%E5%91%8A-%E5%BC%A0%E8%8D%A3%E8%8D%A3.pptx