with cleared_gd_loc_map as
(select * from
gd_loc_map
where loc_nm not in ('北京机床研究所','将台西')
union all
select '北京机床研究所' ,'写字楼'
union all
select '将台西','地铁站')
,CommuteRecords AS (
SELECT
user_id,
DATE_FORMAT(start_time, '%Y-%m-01') AS month_start,
COUNT(DISTINCT DATE(start_time)) AS commute_days
FROM hello_bike_riding_rcd rcd
JOIN cleared_gd_loc_map start_loc ON rcd.start_loc = start_loc.loc_nm
JOIN cleared_gd_loc_map end_loc ON rcd.end_loc = end_loc.loc_nm
WHERE start_loc.loc_type = '地铁站' AND end_loc.loc_type = '写字楼'
or end_loc.loc_type = '地铁站' AND start_loc.loc_type = '写字楼'
GROUP BY user_id, DATE_FORMAT(start_time, '%Y-%m-01')
),
ConsecutiveMonths AS (
SELECT
user_id,
month_start,
LEAD(month_start, 1) OVER (PARTITION BY user_id ORDER BY month_start) AS next_month_start,
LEAD(month_start, 2) OVER (PARTITION BY user_id ORDER BY month_start) AS next_next_month_start
FROM CommuteRecords
WHERE commute_days >= 5
),
ActiveUsers AS (
SELECT user_id
,case when (TIMESTAMPDIFF(MONTH, month_start, next_month_start) = 1 AND
TIMESTAMPDIFF(MONTH, next_month_start, next_next_month_start) = 1) then 1 else 0 end as active_tag
FROM ConsecutiveMonths
)
SELECT
user_id,max(active_tag) as active_tag
FROM ActiveUsers
group by user_id