1️⃣ 리텐션 연습문제

-- 1) weekly retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요. 

-- event_date | event_first_date | diff_date  | user_pseudo_id 
-- diff_date | count(user_pseudo_id) 

with base as (
  select 
  user_pseudo_id,
  DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) as event_first_week,   
  DATE_TRUNC(event_date,WEEK(monday)) as event_week, 
  DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')  as event_time
  from advanced.app_logs
) 

, diff_date_tbl as (
  select user_pseudo_id, 
  event_first_week, 
  event_week,
  DATE_DIFF(event_week, event_first_week, WEEK) as diff_date 
  from base 
  order by diff_date

) 
, user_counts as (
  select event_first_week, 
  diff_date, 
  count(distinct user_pseudo_id) as user_count
  from diff_date_tbl
  group by all 
  order by diff_date
) 

select * , 
ROUND(SAFE_DIVIDE(user_count, first_user_count), 2)  as retention_rate
from (
  select * , 
  FIRST_VALUE(user_count) OVER (PARTITION BY event_first_week ORDER BY diff_date) as first_user_count
  from user_counts 
) 

image.png

2️⃣  retain user를 new + current + resurrected + dormant user로 나누는 쿼리를 작성해보세요.

📌 분류기준 정하기 : 유저들의 평균주기 (마지막 접속일 - 이전 접속일의 평균일)는? -> 약 34일 -> 결론 : 유저 분류시 한달을 기준으로 분류할것.

데이터 기간 : 예를 들어, 2023-01-20일자 데이터까지 존재해, 2023-12-31까지 최근 월의 이전월까지만 살펴보기

📌 유저 세분화 기준

  1. new 유저 = (최근월 - 1)월 = 첫접속일자(event_first_month)
  2. current 유저 = (최근월 -1)월 , (최근월 -2)월 모두 활동 o
  3. dormant 유저 = (최근월 - 1)월 , (최근월 -2)월 모두 활동 x
  4. resurrected 유저 = (최근월 -2)월 활동 x, (최근월 -1)월 에는 활동 o

with base as (
  select 
  user_pseudo_id,
  MIN(event_date) OVER (PARTITION BY user_pseudo_id) as event_first_day,   
  event_date, 
  DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')  as event_time
  from advanced.app_logs
) 

, period as (
  select user_pseudo_id, 
  event_date, 
  LAG(event_date) OVER (PARTITION BY user_pseudo_id ORDER BY event_date) as before_event_date
  from base 
  GROUP BY ALL 
  ORDER BY user_pseudo_id
) 

-- 평균주기 구하기 
SELECT AVG(diff_day) as avg_diff_period
FROM (
select *, 
DATE_DIFF(event_date, before_event_date, DAY ) as diff_day 
from period
) 

-- 접속 주기 분포도 확인 

SELECT diff_day, 
COUNT(distinct user_pseudo_id) as user_count
FROM (
  select *, 
  DATE_DIFF(event_date, before_event_date, DAY ) as diff_day 
  from period
)
GROUP BY ALL 
ORDER BY diff_day 

image.png

image.png

image.png


- 앞서 구한 평균주기에 따라 기간 : monthly로 분류 

- 유저 세분화 기준 
1) new 유저 =  (최근월 - 1)월 = 첫접속일자(event_first_month) 
2) current 유저 =  (최근월 -1)월 , (최근월 -2)월 모두 활동 o 
3) dormant 유저 =  (최근월 - 1)월 , (최근월 -2)월 모두 활동 x
4) resurrected  유저 = (최근월 -2)월 활동 x, (최근월 -1)월 에는 활동 o 

-- 최근 월 : MAX(DATE_TRUNC(event_date, MONTH)) 
-- 최근 월 - 1 이전월 : DATE_SUB(MAX(DATE_TRUNC(event_date, MONTH)), INTERVAL 1 MONTH)
-- 최근 월 - 2 이전월 : DATE_SUB(MAX(DATE_TRUNC(event_date, MONTH)), INTERVAL 2 MONTH)

with base as (
select user_pseudo_id, 
event_date, 
DATE_TRUNC(event_date, MONTH) as event_month
from advanced.app_logs
order by 1 
) 

, min_max_month as (
  SELECT *, 
  MIN(event_month) over (partition by user_pseudo_id order by event_month) as first_month,
  MAX(DATE_TRUNC(event_date, MONTH)) OVER () as lastest_month
  from base 
) 

, filter_month  as (
  SELECT user_pseudo_id, 
  first_month, 
  lastest_month, 
  COUNT(CASE WHEN event_month = DATE_SUB(lastest_month, INTERVAL 1 MONTH) THEN 1 END) AS previous_1_lastest_month,
  COUNT(CASE WHEN event_month = DATE_SUB(lastest_month, INTERVAL 2 MONTH) THEN 1  END)  AS previous_2_lastest_month  
  FROM min_max_month 
  group by ALL 
) 

, user_classification as (
  SELECT *, 
  CASE WHEN first_month = DATE_SUB(lastest_month, INTERVAL 1 MONTH ) THEN 'New'
       WHEN previous_1_lastest_month > 0 and previous_2_lastest_month >= 0 THEN 'Current'
       WHEN previous_1_lastest_month = 0 and previous_2_lastest_month = 0 THEN 'Dormant'
       WHEN previous_1_lastest_month = 0 and previous_2_lastest_month > 0 THEN 'Resurrected'
       ELSE 'Others' END AS user_category
  FROM filter_month 
) 

select user_category, 
count(distinct user_pseudo_id)
from user_classification 
group by all 

image.png

image.png

image.png