with t1 as(
select
cust_uid,
count(*) as transaction_count,
count(distinct trx_dt) as active_days_count
from
mt_trx_rcd_f
group by
cust_uid
),
t2 as(
select
cust_uid,
transaction_count,
CAST(rank() over(order by transaction_count desc) as signed) as transaction_rank,
active_days_count,
CAST(rank() over(order by active_days_count desc) as signed) as active_days_rank
from
t1)
select
cust_uid,
transaction_count,
transaction_rank,
active_days_count,
active_days_rank,
abs(transaction_rank - active_days_rank) as rank_difference
from
t2
order by
rank_difference DESC
WITH user_login_days AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
usr_id, DATE(login_time)
),
ranked_logins AS (
SELECT
usr_id,
login_date,
ROW_NUMBER() OVER (PARTITION BY usr_id ORDER BY login_date) AS row_num
FROM
user_login_days
),
grouped_logins AS (
SELECT
usr_id,
login_date,
login_date - INTERVAL row_num DAY AS grp
FROM
ranked_logins
),
consecutive_logins AS (
SELECT
usr_id,
MIN(login_date) AS start_date,
MAX(login_date) AS end_date,
COUNT(*) AS consecutive_days
FROM
grouped_logins
GROUP BY
usr_id, grp
HAVING
COUNT(*) > 2
)
SELECT
usr_id,
start_date,
end_date,
consecutive_days
FROM
consecutive_logins
ORDER BY
usr_id ASC,
start_date ;
WITH user_login_days AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 180 DAY)
),
distinct_login_days AS (
SELECT
usr_id,
COUNT(DISTINCT login_date) AS login_days
FROM
user_login_days
GROUP BY
usr_id
)
SELECT
SUM(CASE WHEN login_days BETWEEN 1 AND 5 THEN 1 ELSE 0 END) AS days_1_to_5,
SUM(CASE WHEN login_days BETWEEN 6 AND 10 THEN 1 ELSE 0 END) AS days_6_to_10,
SUM(CASE WHEN login_days BETWEEN 11 AND 20 THEN 1 ELSE 0 END) AS days_11_to_20,
SUM(CASE WHEN login_days > 20 THEN 1 ELSE 0 END) AS days_over_20
FROM
distinct_login_days;
SELECT
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '07:30:00' AND '09:30:00'
OR TIME(login_time) BETWEEN '18:30:00' AND '20:30:00' THEN usr_id
END) AS commute,
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '11:30:00' AND '14:00:00' THEN usr_id
END) AS lunch_break,
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '22:30:00' AND '23:59:59' THEN usr_id
WHEN TIME(login_time) BETWEEN '00:00:00' AND '01:00:00' THEN usr_id
END) AS bedtime
FROM
user_login_log
WHERE
login_time >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')
AND login_time < DATE_FORMAT(CURDATE(), '%Y-%m-01');
WITH monthly_unique_logins AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_month
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-01-01'
GROUP BY
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01')
),
new_users AS (
SELECT
usr_id,
MIN(login_month) AS first_login_month
FROM
monthly_unique_logins
GROUP BY
usr_id
),
next_month_logins AS (
SELECT
nu.usr_id,
nu.first_login_month AS current_month,
mul.login_month AS next_month
FROM
new_users nu
LEFT JOIN
monthly_unique_logins mul
ON
nu.usr_id = mul.usr_id AND
mul.login_month = DATE_ADD(nu.first_login_month, INTERVAL 1 MONTH)
)
SELECT
current_month,
ROUND(COUNT(DISTINCT CASE WHEN next_month IS NOT NULL THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_month_retention_rate
FROM
next_month_logins
WHERE
current_month >= '2024-01-01' AND current_month < '2025-01-01'
GROUP BY
current_month
ORDER BY
current_month;
WITH monthly_unique_logins AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_month
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-01-01'
GROUP BY
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01')
),
next_month_logins AS (
SELECT
mul1.usr_id,
mul1.login_month AS current_month,
mul2.login_month AS next_month
FROM
monthly_unique_logins mul1
LEFT JOIN
monthly_unique_logins mul2
ON
mul1.usr_id = mul2.usr_id AND
mul2.login_month = DATE_ADD(mul1.login_month, INTERVAL 1 MONTH)
)
SELECT
current_month,
ROUND(COUNT(DISTINCT CASE WHEN next_month IS NOT NULL THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_month_retention_rate
FROM
next_month_logins
WHERE
current_month >= '2024-01-01' AND current_month < '2024-12-01'
GROUP BY
current_month
ORDER BY
current_month;
WITH daily_unique_logins AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY
usr_id,
DATE(login_time)
),
retention_days AS (
SELECT
dul1.usr_id,
dul1.login_date AS first_login_date,
dul2.login_date AS next_day_login_date,
DATEDIFF(dul2.login_date, dul1.login_date) AS days_diff
FROM
daily_unique_logins dul1
LEFT JOIN
daily_unique_logins dul2
ON
dul1.usr_id = dul2.usr_id AND
dul2.login_date BETWEEN dul1.login_date + INTERVAL 1 DAY AND dul1.login_date + INTERVAL 14 DAY
)
SELECT
first_login_date,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 3 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_3_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 7 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_7_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 14 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_14_retention_rate
FROM
retention_days
WHERE
first_login_date >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY
first_login_date
ORDER BY
first_login_date;
WITH daily_unique_logins AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
usr_id,
DATE(login_time)
),
retention_days AS (
SELECT
dul1.usr_id,
dul1.login_date AS first_login_date,
dul2.login_date AS next_day_login_date,
DATEDIFF(dul2.login_date, dul1.login_date) AS days_diff
FROM
daily_unique_logins dul1
LEFT JOIN
daily_unique_logins dul2
ON
dul1.usr_id = dul2.usr_id AND
dul2.login_date BETWEEN dul1.login_date + INTERVAL 1 DAY AND dul1.login_date + INTERVAL 14 DAY
)
SELECT
first_login_date,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 1 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 3 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_3_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 7 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_7_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 14 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_14_retention_rate
FROM
retention_days
WHERE
first_login_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
first_login_date
ORDER BY
first_login_date;
with data1 as (
select distinct
usr_id,
date(login_time) as login_date
from
user_login_log
where
datediff(current_date, date(login_time)) <= 30
),
data2 as (
select
T.usr_id,
T.login_date as T_date,
T_1.login_date as T_1_date
from
data1 as T
left join
data1 as T_1
on
T.usr_id = T_1.usr_id
and datediff(T.login_date, T_1.login_date) = -1
)
select
T_date as first_login_date,
concat(round(avg(T_1_date is not null)*100, 2), '%') as T1_retention_rate
from
data2
group by
T_date
order by
T_date;
with active_users_before_august as (
select
usr_id,
date_format(login_time, '%Y-%m') as month
from
user_login_log
where
login_time < '2024-08-01'
group by
usr_id, date_format(login_time, '%Y-%m')
having
count(*) >= 10
),
active_users_after_august as (
select
usr_id,
date_format(login_time, '%Y-%m') as month
from
user_login_log
where
login_time >= '2024-08-01'
group by
usr_id, date_format(login_time, '%Y-%m')
having
count(*) >= 10
)
select
count(distinct au1.usr_id) as inactive_user_count
from
active_users_before_august au1
where
not exists (
select 1
from active_users_after_august au2
where au2.usr_id = au1.usr_id
);
select
goods_id
,sum(order_gmv) as total_gmv
from
order_info
where
left(order_time,10) = '2024-09-10'
group by
goods_id
order by
total_gmv desc
limit
10;
SELECT
gd.gd_id,
gd.gd_nm,
COUNT(fav.fav_trq) AS fav_count
FROM
xhs_fav_rcd fav
JOIN
gd_inf gd ON fav.mch_id = gd.gd_id
GROUP BY
gd.gd_id, gd.gd_nm
ORDER BY
fav_count DESC
LIMIT 1;
with t1 as(
select
a.start_loc,
a.end_loc,
count(*) as trip_count
from
didi_sht_rcd a
join
loc_nm_ctg bstart on bstart.loc_nm = a.start_loc
join
loc_nm_ctg bend on bend.loc_nm = a.end_loc
where
bstart.loc_ctg = '酒店' and bend.loc_ctg = '餐饮'
group by
1,2
)
select
start_loc,end_loc,trip_count
from
t1
order by
trip_count desc
limit
1;
with t1 as(
select
a.start_loc,
a.end_loc,
count(*) as trip_count
from
didi_sht_rcd a
join
loc_nm_ctg bstart on bstart.loc_ctg = a.start_loc
join
loc_nm_ctg bend on bend.loc_ctg = a.end_loc
where
bstart.loc_ctg = '酒店' and bend.loc_ctg = '餐饮'
group by
1,2
)
select
start_loc,end_loc,trip_count
from
t1
order by
trip_count desc
limit
1;
SELECT r.*
FROM didi_sht_rcd r
JOIN loc_nm_ctg l_start ON r.start_loc = l_start.loc_nm
JOIN loc_nm_ctg l_end ON r.end_loc = l_end.loc_nm
WHERE l_start.loc_ctg = '住宅'
AND l_end.loc_ctg = '写字楼'
ORDER BY r.start_tm ASC;
SELECT r.*
FROM didi_sht_rcd r
JOIN loc_nm_ctg l_start ON r.start_loc = l_start.loc_nm
JOIN loc_nm_ctg l_end ON r.end_loc = l_end.loc_nm
WHERE l_start.loc_ctg = '餐饮'
AND l_end.loc_ctg = '餐饮'
ORDER BY r.start_tm ASC;
WITH ConvertedTime AS (
SELECT
order_id,
cust_uid,
DATE_ADD(call_time, INTERVAL -3 HOUR) AS local_call_time,
grab_time,
cancel_time,
finish_time
FROM
didi_order_rcd
),
CallDates AS (
SELECT
order_id,
cust_uid,
local_call_time,
DATE(local_call_time) AS call_date
FROM
ConvertedTime
),
NextDayCalls AS (
SELECT
c1.order_id AS order_id_1,
c1.call_date AS call_date_1,
c2.order_id AS order_id_2,
c2.call_date AS call_date_2
FROM
CallDates c1
JOIN
CallDates c2
ON
c2.call_date = DATE_ADD(c1.call_date, INTERVAL 1 DAY)
AND
c1.cust_uid = c2.cust_uid
AND
c1.order_id = c2.order_id
),
NextDayCallCount AS (
SELECT
COUNT(DISTINCT order_id_1) AS next_day_call_count
FROM
NextDayCalls
),
TotalOrderCount AS (
SELECT
COUNT(order_id) AS total_order_count
FROM
didi_order_rcd
where finish_time = '1970-01-01 00:00:00'
)
SELECT
ncc.next_day_call_count,
toc.total_order_count,
CONCAT(FORMAT((ncc.next_day_call_count * 1.0 / toc.total_order_count) * 100, 2), '%') AS next_day_call_ratio
FROM
NextDayCallCount ncc,
TotalOrderCount toc;
WITH ConvertedOrders AS (
SELECT
order_id,
cust_uid,
call_time AS utc_call_time,
DATE_ADD(call_time, INTERVAL -3 HOUR) AS br_call_time,
DATE(DATE_ADD(call_time, INTERVAL -3 HOUR)) AS br_call_date,
finish_time AS utc_finish_time,
CASE WHEN finish_time = '1970-01-01 00:00:00' THEN 1 ELSE 0 END AS is_unfinished
FROM didi_order_rcd
),
UnfinishedOrders AS (
SELECT
order_id,
cust_uid,
br_call_date
FROM ConvertedOrders
WHERE is_unfinished = 1
),
UserNextDayCalls AS (
SELECT
uo.order_id AS unfinished_order_id,
uo.cust_uid,
uo.br_call_date AS unfinished_date,
CASE
WHEN MAX(CASE WHEN co.br_call_date = DATE_ADD(uo.br_call_date, INTERVAL 1 DAY) THEN 1 ELSE 0 END) = 1
THEN 1
ELSE 0
END AS has_next_day_call
FROM UnfinishedOrders uo
LEFT JOIN ConvertedOrders co
ON uo.cust_uid = co.cust_uid
AND co.br_call_date >= uo.br_call_date
GROUP BY
uo.order_id, uo.cust_uid, uo.br_call_date
)
SELECT
COUNT(DISTINCT unfinished_order_id) AS total_unfinished_orders,
SUM(has_next_day_call) AS next_day_call_orders,
CONCAT(
FORMAT(
(SUM(has_next_day_call) * 1.0 / COUNT(DISTINCT unfinished_order_id)) * 100,
2
),
'%'
) AS next_day_call_ratio
FROM UserNextDayCalls;