select
user_id,
case when sum(tag)over(partition by user_id)>=1 then 1
else0
end as active_tag
from(
Select user_id,ye,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE 0
END AS tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery )
as A ;
select
distinct user_id,
case when sum(tag)over(partition by user_id)>=1 then 1
else0
end as active_tag
from(
Select user_id,ye,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE 0
END AS tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery )
as A ;
select
user_id,
case when
sum(tag)over(partition by user_id)>=1 then 1
else0
end as active_tag
from(
Select user_id,ye,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE0
END AS tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery )
as A ;
select
distinct user_id,
case when
sum(tag)over(partition by user_id)>=1 then 1
else0
end as active_tag
from(
Select user_id,ye,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE0
END AS tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery )
as A ;
select user_id,
case when
sum(tag)over(partition by user_id)>=1 then 1
else0
end as active_tag
from(
Select user_id,ye,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE0
END AS tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery )
as A ;
SELECT
distinct user_id,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE 0
END AS active_tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery;
SELECT
user_id,
CASE WHEN
commute_days >= 5 AND prev_month_commute_days >= 5 AND prev_two_months_commute_days >= 5
THEN 1
ELSE 0
END AS active_tag
FROM (
SELECT
user_id,
ye,
commute_days,
LAG(commute_days, 1) OVER (PARTITION BY user_id ORDER BY ye) AS prev_month_commute_days,
LAG(commute_days, 2) OVER (PARTITION BY user_id ORDER BY ye) AS prev_two_months_commute_days
FROM
( SELECT
user_id,
ye,
COUNT(DISTINCT start_date) AS commute_days
FROM
( SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS ye,
r.start_loc,
r.end_loc,
m1.loc_type AS start_type,
m2.loc_type AS end_type,
DATE(r.start_time) AS start_date
FROM
hello_bike_riding_rcd r
JOIN gd_loc_map m1 ON r.start_loc = m1.loc_nm AND m1.loc_type IN ('地铁站', '写字楼')
JOIN gd_loc_map m2 ON r.end_loc = m2.loc_nm AND m2.loc_type IN ('地铁站', '写字楼')
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
AND ((m1.loc_type = '地铁站' AND m2.loc_type = '写字楼') OR (m1.loc_type = '写字楼' AND m2.loc_type = '地铁站'))) CommuteData
GROUP BY
user_id,
ye) MonthlyCommute
) AS Subquery;