WITH
commute_order AS (
SELECT
r.user_id,
r.id AS order_id,
r.start_time,
DATE_FORMAT(r.start_time, 'yyyy-MM-dd') AS ride_date,
DATE_FORMAT(r.start_time, 'yyyy-MM') AS ride_month,
CASE
WHEN s_loc.loc_type = '地铁站' AND e_loc.loc_type = '写字楼' THEN '上班程'
WHEN s_loc.loc_type = '写字楼' AND e_loc.loc_type = '地铁站' THEN '下班程'
ELSE '非通勤'
END AS commute_type
FROM hello_bike_riding_rcd r
LEFT JOIN gd_loc_map s_loc
ON r.start_loc = s_loc.loc_nm
LEFT JOIN gd_loc_map e_loc
ON r.end_loc = e_loc.loc_nm
WHERE r.start_time BETWEEN '2020-01-01 00:00:00' AND '2024-12-31 23:59:59'
),
filtered_commute_order AS (
SELECT *
FROM commute_order
WHERE commute_type != '非通勤'
),
month_qualified_user AS (
SELECT
user_id,
ride_month,
COUNT(DISTINCT ride_date) AS valid_commute_days
FROM filtered_commute_order
GROUP BY user_id, ride_month
HAVING valid_commute_days >= 5
),
consecutive_qualified_user AS (
SELECT
DISTINCT user_id
FROM (
SELECT
user_id,
ride_month,
YEAR(STR_TO_DATE(ride_month, '%Y-%m')) * 12 + MONTH(STR_TO_DATE(ride_month, '%Y-%m')) AS month_num,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY ride_month) AS rn
FROM month_qualified_user
) t
GROUP BY user_id, (month_num - rn)
HAVING COUNT(*) >= 3
),
all_user AS (
SELECT DISTINCT user_id
FROM hello_bike_riding_rcd
)
SELECT
a.user_id,
CASE WHEN c.user_id IS NOT NULL THEN 1 ELSE 0 END AS active_tag
FROM all_user a
LEFT JOIN consecutive_qualified_user c
ON a.user_id = c.user_id
ORDER BY a.user_id;