-- 1) weekly retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요.
-- event_date | event_first_date | diff_date | user_pseudo_id
-- diff_date | count(user_pseudo_id)
with base as (
select
user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) as event_first_week,
DATE_TRUNC(event_date,WEEK(monday)) as event_week,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') as event_time
from advanced.app_logs
)
, diff_date_tbl as (
select user_pseudo_id,
event_first_week,
event_week,
DATE_DIFF(event_week, event_first_week, WEEK) as diff_date
from base
order by diff_date
)
, user_counts as (
select event_first_week,
diff_date,
count(distinct user_pseudo_id) as user_count
from diff_date_tbl
group by all
order by diff_date
)
select * ,
ROUND(SAFE_DIVIDE(user_count, first_user_count), 2) as retention_rate
from (
select * ,
FIRST_VALUE(user_count) OVER (PARTITION BY event_first_week ORDER BY diff_date) as first_user_count
from user_counts
)
📌 분류기준 정하기 : 유저들의 평균주기 (마지막 접속일 - 이전 접속일의 평균일)는? -> 약 34일 -> 결론 : 유저 분류시 한달을 기준으로 분류할것.
데이터 기간 : 예를 들어, 2023-01-20일자 데이터까지 존재해, 2023-12-31까지 최근 월의 이전월까지만 살펴보기
📌 유저 세분화 기준
with base as (
select
user_pseudo_id,
MIN(event_date) OVER (PARTITION BY user_pseudo_id) as event_first_day,
event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') as event_time
from advanced.app_logs
)
, period as (
select user_pseudo_id,
event_date,
LAG(event_date) OVER (PARTITION BY user_pseudo_id ORDER BY event_date) as before_event_date
from base
GROUP BY ALL
ORDER BY user_pseudo_id
)
-- 평균주기 구하기
SELECT AVG(diff_day) as avg_diff_period
FROM (
select *,
DATE_DIFF(event_date, before_event_date, DAY ) as diff_day
from period
)
-- 접속 주기 분포도 확인
SELECT diff_day,
COUNT(distinct user_pseudo_id) as user_count
FROM (
select *,
DATE_DIFF(event_date, before_event_date, DAY ) as diff_day
from period
)
GROUP BY ALL
ORDER BY diff_day
- 앞서 구한 평균주기에 따라 기간 : monthly로 분류
- 유저 세분화 기준
1) new 유저 = (최근월 - 1)월 = 첫접속일자(event_first_month)
2) current 유저 = (최근월 -1)월 , (최근월 -2)월 모두 활동 o
3) dormant 유저 = (최근월 - 1)월 , (최근월 -2)월 모두 활동 x
4) resurrected 유저 = (최근월 -2)월 활동 x, (최근월 -1)월 에는 활동 o
-- 최근 월 : MAX(DATE_TRUNC(event_date, MONTH))
-- 최근 월 - 1 이전월 : DATE_SUB(MAX(DATE_TRUNC(event_date, MONTH)), INTERVAL 1 MONTH)
-- 최근 월 - 2 이전월 : DATE_SUB(MAX(DATE_TRUNC(event_date, MONTH)), INTERVAL 2 MONTH)
with base as (
select user_pseudo_id,
event_date,
DATE_TRUNC(event_date, MONTH) as event_month
from advanced.app_logs
order by 1
)
, min_max_month as (
SELECT *,
MIN(event_month) over (partition by user_pseudo_id order by event_month) as first_month,
MAX(DATE_TRUNC(event_date, MONTH)) OVER () as lastest_month
from base
)
, filter_month as (
SELECT user_pseudo_id,
first_month,
lastest_month,
COUNT(CASE WHEN event_month = DATE_SUB(lastest_month, INTERVAL 1 MONTH) THEN 1 END) AS previous_1_lastest_month,
COUNT(CASE WHEN event_month = DATE_SUB(lastest_month, INTERVAL 2 MONTH) THEN 1 END) AS previous_2_lastest_month
FROM min_max_month
group by ALL
)
, user_classification as (
SELECT *,
CASE WHEN first_month = DATE_SUB(lastest_month, INTERVAL 1 MONTH ) THEN 'New'
WHEN previous_1_lastest_month > 0 and previous_2_lastest_month >= 0 THEN 'Current'
WHEN previous_1_lastest_month = 0 and previous_2_lastest_month = 0 THEN 'Dormant'
WHEN previous_1_lastest_month = 0 and previous_2_lastest_month > 0 THEN 'Resurrected'
ELSE 'Others' END AS user_category
FROM filter_month
)
select user_category,
count(distinct user_pseudo_id)
from user_classification
group by all