with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm like '将%' then -1
when loc_nm like '北%' or loc_type like '写%' then 1
when loc_type like '地%' then -1
end as loc_num
from gd_loc_map
),
tag_mon AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, "%Y%m") AS mon
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND s.loc_num + e.loc_num = 0
GROUP BY 1, 2
HAVING COUNT(DISTINCT date(start_time)) >= 5
),
user_active_tag AS (
SELECT DISTINCT
user_id,1 AS active_tag
FROM (
SELECT
user_id, mon,
PERIOD_ADD(mon, -RANK() OVER (PARTITION BY user_id ORDER BY mon)) AS g
FROM tag_mon
) grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id
FROM hello_bike_riding_rcd
)
SELECT
user_id,
coalesce(u.active_tag, 0) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm like '将%' or loc_type like '地%' then -1
when loc_nm like '北%' or loc_type like '写%' then 1
end as loc_num
from gd_loc_map
),
tag_mon AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, "%Y%m") AS mon
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND s.loc_num + e.loc_num = 0
GROUP BY 1, 2
HAVING COUNT(DISTINCT date(start_time)) >= 5
),
user_active_tag AS (
SELECT DISTINCT
user_id,1 AS active_tag
FROM (
SELECT
user_id, mon,
PERIOD_ADD(mon, -RANK() OVER (PARTITION BY user_id ORDER BY mon)) AS g
FROM tag_mon
) grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id
FROM hello_bike_riding_rcd
)
SELECT
user_id,
coalesce(u.active_tag, 0) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month
FROM base_data
GROUP BY user_id, ride_month
HAVING COUNT(DISTINCT ride_date) >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id
FROM hello_bike_riding_rcd
)
SELECT
user_id,
ifnull(u.active_tag, 0) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
WITH user_login AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from user_login
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN 1
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN 2
ELSE 4
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(user_type&1) new,
sum((user_type&2) >> 1) retained,
sum((user_type&4) >> 2) returning,
count(1) as total_count
FROM user_categories
GROUP BY login_date
),
cohort_summary AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100 / total_count, 2), ', ',
ROUND(retained * 100 / total_count, 2), ', ',
ROUND(returning * 100 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct FROM cohort_summary
ORDER BY login_date;
WITH user_login AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from user_login
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN 1
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN 2
ELSE 4
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(user_type&1) new,
sum(user_type&2) retained,
sum(user_type&4) returning,
count(1) as total_count
FROM user_categories
GROUP BY login_date
),
cohort_summary AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100 / total_count, 2), ', ',
ROUND(retained * 100 / total_count, 2), ', ',
ROUND(returning * 100 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct FROM cohort_summary
ORDER BY login_date;
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month
FROM base_data
GROUP BY user_id, ride_month
HAVING COUNT(DISTINCT ride_date) >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then 1
when loc_nm = '将台西' then 0
when loc_type = '写字楼' then 1
when loc_type = '地铁站' then 0
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
and s.loc_type ^ e.loc_type = 1
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
WITH user_login AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from user_login
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN '1'
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN '2'
ELSE '3'
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(if(user_type="1",1,0)) new,
sum(if(user_type="2",1,0)) retained,
sum(if(user_type="3",1,0)) returning,
count(1) as total_count
FROM user_categories
GROUP BY login_date
),
formatted_stats AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100 / total_count, 2), ', ',
ROUND(retained * 100 / total_count, 2), ', ',
ROUND(returning * 100 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct FROM formatted_stats
ORDER BY login_date;
WITH data AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from data
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN '1'
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN '2'
ELSE '3'
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(if(user_type="1",1,0)) new,
sum(if(user_type="2",1,0)) retained,
sum(if(user_type="3",1,0)) returning,
count(1) as total_count
FROM user_categories
GROUP BY login_date
),
formatted_stats AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100 / total_count, 2), ', ',
ROUND(retained * 100 / total_count, 2), ', ',
ROUND(returning * 100 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct FROM formatted_stats
ORDER BY login_date;
WITH data AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from data
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN '1'
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN '2'
ELSE '3'
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(if(user_type="1",1,0)) new,
sum(if(user_type="2",1,0)) retained,
sum(if(user_type="3",1,0)) returning,
sum(1) as total_count
FROM user_categories
GROUP BY login_date
),
formatted_stats AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100.0 / total_count, 2), ', ',
ROUND(retained * 100.0 / total_count, 2), ', ',
ROUND(returning * 100.0 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct FROM formatted_stats
ORDER BY login_date;
WITH data AS (
SELECT DISTINCT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
),
daily_users as (
SELECT
usr_id, login_date,
lag(login_date) over(partition by usr_id order by login_date) last_login_date
from data
),
user_categories AS (
SELECT
login_date,
usr_id,
CASE
WHEN last_login_date IS NULL THEN '1'
WHEN DATEDIFF(login_date, last_login_date) BETWEEN 1 AND 3 THEN '2'
ELSE '3'
END AS user_type
FROM daily_users
WHERE YEAR(login_date) = 2024
),
user_stats AS (
SELECT
login_date,
sum(if(user_type="1",1,0)) new,
sum(if(user_type="2",1,0)) retained,
sum(if(user_type="3",1,0)) returning,
sum(1) as total_count
FROM user_categories
GROUP BY login_date
),
formatted_stats AS (
SELECT
login_date,
CONCAT(
ROUND(new * 100.0 / total_count, 2), ', ',
ROUND(retained * 100.0 / total_count, 2), ', ',
ROUND(returning * 100.0 / total_count, 2)
) AS pct
FROM user_stats
)
SELECT login_date, pct
FROM formatted_stats
ORDER BY login_date;
SELECT
CONCAT(card1, card2) AS card12,
IF(
EXISTS (
SELECT 1
FROM hand_probabilities
WHERE hand = CONCAT(SUBSTRING(card1, 1, 1), SUBSTRING(card2, 1, 1))
OR hand = CONCAT(SUBSTRING(card2, 1, 1), SUBSTRING(card1, 1, 1))
),
1,
0
) AS if_garrett
FROM hand_permutations;
SELECT DISTINCT
mch_nm AS merchant_name,
CASE
WHEN mch_nm LIKE '%拼多多%' OR mch_nm LIKE '%寻梦信息科技%' THEN '拼多多'
WHEN mch_nm LIKE '%京东%' THEN '京东'
WHEN mch_nm LIKE '%淘宝%' THEN '淘系'
WHEN mch_nm LIKE '%抖音%' THEN '抖音'
WHEN mch_nm LIKE '%小红书%' THEN '小红书'
ELSE '其他'
END AS platform
FROM ccb_trx_rcd;
SELECT
mch_nm AS merchant_name,
CASE
WHEN mch_nm LIKE '%拼多多%' OR mch_nm LIKE '%寻梦信息%' THEN '拼多多'
WHEN mch_nm LIKE '%京东%' THEN '京东'
WHEN mch_nm LIKE '%淘宝%' OR mch_nm LIKE '%天猫%' OR mch_nm LIKE '%淘%' THEN '淘系'
WHEN mch_nm LIKE '%抖音%' THEN '抖音'
WHEN mch_nm LIKE '%小红书%' THEN '小红书'
ELSE '其他'
END AS platform
FROM ccb_trx_rcd;
SELECT DISTINCT
mch_nm AS merchant_name,
CASE
WHEN mch_nm LIKE '%拼多多%' OR mch_nm LIKE '%寻梦信息科技%' THEN '拼多多'
WHEN mch_nm LIKE '%京东%' THEN '京东'
WHEN mch_nm LIKE '%淘宝%' OR mch_nm LIKE '%天猫%' OR mch_nm LIKE '%淘%' THEN '淘系'
WHEN mch_nm LIKE '%抖音%' THEN '抖音'
WHEN mch_nm LIKE '%小红书%' THEN '小红书'
ELSE '其他'
END AS platform
FROM ccb_trx_rcd;
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u using(user_id);
with corrected_gd_loc_map as (
select
loc_nm,
case
when loc_nm = '北京机床研究所' then '写字楼'
when loc_nm = '将台西' then '地铁站'
else loc_type
end as loc_type
from gd_loc_map
),
base_data AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, "%Y%m") AS ride_month
FROM hello_bike_riding_rcd r
JOIN corrected_gd_loc_map s ON r.start_loc = s.loc_nm
JOIN corrected_gd_loc_map e ON r.end_loc = e.loc_nm
WHERE YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
),
monthly_commute_days AS (
SELECT
user_id, ride_month,
COUNT(DISTINCT ride_date) AS commute_days
FROM base_data
GROUP BY user_id, ride_month
HAVING commute_days >= 5
),
grouped_data AS (
SELECT
user_id, ride_month,
PERIOD_ADD(ride_month, -RANK() OVER (PARTITION BY user_id ORDER BY ride_month)) AS g
FROM monthly_commute_days
),
user_active_tag AS (
SELECT
DISTINCT user_id,1 AS active_tag
FROM grouped_data
GROUP BY user_id, g
HAVING COUNT(1) >= 3
),
all_users AS (
SELECT DISTINCT
user_id,
0 AS active_tag
FROM hello_bike_riding_rcd
)
SELECT
a.user_id,
COALESCE(u.active_tag, a.active_tag) AS active_tag
FROM all_users a
LEFT JOIN user_active_tag u ON a.user_id = u.user_id;