with login_before as (
select
distinct usr_id
from
user_login_log
where
date(login_time) <= '2024-10-01'
),
login_after as (
select
distinct usr_id
from
user_login_log
where
date(login_time) >= '2024-10-02'
)
select
count(b.usr_id) - count(a.usr_id) as inactive_user_count
from
login_before b
left join
login_after a on a.usr_id = b.usr_id
;
为什么不对呢?
这样写为什么结果和参考答案不一样呢?
select
distinct u1.mch_nm
from
cmb_usr_trx_rcd u1
inner join
cmb_usr_trx_rcd u2 on u1.mch_nm = u1.mch_nm
where
year(u1.trx_time) = 2024
and
year(u2.trx_time) = 2024
and
u1.usr_id = 5201314520
and
u2.usr_id = 5211314521
order by
u1.mch_nm desc;
我搞错了,两家分别统计的结果相加应该=2,不是>=2
having
count(distinct case when u1.mch_nm = '庄家界(千灯店)' then u1.cust_uid else null end) + count(distinct case when u1.mch_nm = '黄记烘培宫廷桃酥王' then u1.cust_uid else null end) = 2
我觉得我这种写法比较简洁。
在过滤cust_id_1的时候要满足用户两家店都去过,也就是两家店都能在mch_nm里找到,那么就应该分别对两家进行count,因为如果只是用 mch_nm in ('庄家界(千灯店)', '黄记烘培宫廷桃酥王') 来判断,那么只吃过其中一家(超过两次)的用户就会被计入。
select
u.cust_uid,
u1.cust_uid as cust_uid_1
from
mt_trx_rcd1 u
left join
mt_trx_rcd1 u1 on u.cust_uid <> u1.cust_uid
where
u.cust_uid = 'MT10000'
group by
u.cust_uid,
cust_uid_1
having
count(distinct case when u1.mch_nm = '庄家界(千灯店)' then u1.cust_uid else null end) + count(distinct case when u1.mch_nm = '黄记烘培宫廷桃酥王' then u1.cust_uid else null end) >= 2;
with groupings as (
select
so.album_id,
so.song_id,
so.song_name,
count(distinct r.user_id) as user_count,
count(r.user_id) as play_count
from
singer_info si
inner join
song_info so on so.origin_singer_id = si.singer_id
inner join
album_info a on so.album_id = a.album_id
inner join
listen_rcd r on r.song_id = so.song_id
where
si.singer_name = 'Michael Jackson'
group by
so.album_id,
so.song_id,
so.song_name
),
top_album as (
select
album_id,
sum(user_count) as album_user_count
from
groupings
group by
album_id
order by
album_user_count desc
limit 1
)
select
g.song_name,
sum(g.play_count) as total_play_count,
sum(g.user_count) as total_user_count
from
top_album t
inner join
groupings g on t.album_id = g.album_id
group by
g.song_name
order by
total_play_count desc
limit 3;
我在第一个子查询的group by里没有添加video_id,因为我觉得可以直接统计每个类别的观看用户数和观看至视频结束的用户数,但是这样计算并没有得到输出示例里的结果,p行的数据是对的,l行AI_with_hint no_AI_with_hint数据不对。为什么会这样?
with group_counting as (
select
inf.screen_type,
inf.if_AI_talking,
inf.if_hint,
count(distinct case when timestampdiff(second, log.start_time, log.end_time) >= inf.duration then log.uid else null end) as finish_count,
count(distinct log.uid) as total_count
from
ks_video_inf inf
inner join
ks_video_wat_log log on log.video_id = inf.video_id
group by
inf.screen_type,
inf.if_AI_talking,
inf.if_hint
)
select
screen_type,
round(100 * max(case when if_AI_talking = 1 and if_hint = 1 then finish_count else 0 end) / nullif(max(case when if_AI_talking = 1 and if_hint = 1 then total_count end), 0), 2) as AI_with_hint,
round(100 * max(case when if_AI_talking = 1 and if_hint = 0 then finish_count else 0 end) / nullif(max(case when if_AI_talking = 1 and if_hint = 0 then total_count end), 0), 2) as AI_no_hint,
round(100 * max(case when if_AI_talking = 0 and if_hint = 1 then finish_count else 0 end) / nullif(max(case when if_AI_talking = 0 and if_hint = 1 then total_count end), 0), 2) as no_AI_with_hint,
round(100 * max(case when if_AI_talking = 0 and if_hint = 0 then finish_count else 0 end) / nullif(max(case when if_AI_talking = 0 and if_hint = 0 then total_count end), 0), 2) as no_AI_no_hint
from
group_counting
group by
screen_type;
再理解下这种写法:SELECT
i.screen_type,
ROUND(COALESCE(
SUM((TIMESTAMPDIFF(SECOND, l.start_time, l.end_time) >= i.duration) * (i.if_AI_talking = 1 AND i.if_hint = 1))
* 100.0
/ NULLIF(SUM(i.if_AI_talking = 1 AND i.if_hint = 1), 0),
0
), 2) AS AI_with_hint,
ROUND(COALESCE(
SUM((TIMESTAMPDIFF(SECOND, l.start_time, l.end_time) >= i.duration) * (i.if_AI_talking = 1 AND i.if_hint = 0))
* 100.0
/ NULLIF(SUM(i.if_AI_talking = 1 AND i.if_hint = 0), 0),
0
), 2) AS AI_no_hint,
ROUND(COALESCE(
SUM((TIMESTAMPDIFF(SECOND, l.start_time, l.end_time) >= i.duration) * (i.if_AI_talking = 0 AND i.if_hint = 1))
* 100.0
/ NULLIF(SUM(i.if_AI_talking = 0 AND i.if_hint = 1), 0),
0
), 2) AS no_AI_with_hint,
ROUND(COALESCE(
SUM((TIMESTAMPDIFF(SECOND, l.start_time, l.end_time) >= i.duration) * (i.if_AI_talking = 0 AND i.if_hint = 0))
* 100.0
/ NULLIF(SUM(i.if_AI_talking = 0 AND i.if_hint = 0), 0),
0
), 2) AS no_AI_no_hint
FROM
ks_video_inf i
JOIN
ks_video_wat_log l ON i.video_id = l.video_id
GROUP BY
i.screen_type;。这个是最直接的,没有弯弯绕绕的多次计算。你写的相当于多了一次去重。把分子分母都搞小了。
使用两表联合,条件是商品id相等和用户ID相等,但是在where语句里过滤收藏时间早于购买时间却产生了奇怪的结果:收藏用户和购买用户数量相等了。代码如下:
with purchase_cnt as (
select
f.mch_id,
count(distinct f.cust_uid) as fav_user_cnt,
count(distinct p.cust_uid) as pcs_user_cnt
from
xhs_fav_rcd f
left join
xhs_pchs_rcd p on p.mch_id = f.mch_id and p.cust_uid = f.cust_uid
where
p.pchs_tm > f.fav_tm
group by
f.mch_id
)
select
mch_id,
fav_user_cnt,
pcs_user_cnt,
round(100 * cast(pcs_user_cnt as float) / fav_user_cnt, 2) as convert_rate
from
purchase_cnt
group by
mch_id,
fav_user_cnt,
pcs_user_cnt
order by
convert_rate desc;
后来把 p.pchs_tm > f.fav_tm 写在了on里面 on p.mch_id = f.mch_id and p.cust_uid = f.cust_uid and p.pchs_tm > f.fav_tm,结果就都对了。为什么会这样?
起点是餐饮、终点是餐饮两个子查询,同时在两个集合中的地点:id相同,且时间相同
with start_catering as (
select
r.cust_uid,
r.start_loc,
r.end_loc,
r.start_tm,
r.car_cls
from
didi_sht_rcd r
inner join
loc_nm_ctg l on l.loc_nm = r.start_loc
where
l.loc_ctg = '餐饮'
),
end_catering as (
select
r.cust_uid,
r.start_loc,
r.end_loc,
r.start_tm,
r.car_cls
from
didi_sht_rcd r
inner join
loc_nm_ctg l on l.loc_nm = r.end_loc
where
l.loc_ctg = '餐饮'
)
select
s.*
from
start_catering s
inner join
end_catering e on s.cust_uid = e.cust_uid and s.start_tm = e.start_tm
order by
s.start_tm asc
;
select
t.name,
count(distinct s.student_id) as total_stu,
count(distinct (case when sc.score < 60 then s.student_id else null end)) as unpass_stu,
count(distinct case when sc.score < 60 then s.student_id else null end) / count(distinct s.student_id) as up_rate
from students s
inner join
teachers t on t.class_code like concat('%', s.class_code, '%')
inner join
scores sc on s.student_id = sc.student_id
where
t.subject = '化学'
group by
t.name
;
这样写有问题吗?未及格学生人数和正确结果有1-3的误差,不知为什么。
with score_ranges as (
select
st.class_code,
count(*) as class_total,
sum(case when sc.score >= 110 then 1 else 0 end) as excellent,
sum(case when sc.score between 90 and 109 then 1 else 0 end) as good,
sum(case when sc.score between 60 and 89 then 1 else 0 end) as pass,
sum(case when sc.score < 60 then 1 else 0 end) as fail
from
students st
inner join
scores sc on sc.student_id = st.student_id
where
exam_date = '2024-06-30' and sc.subject = '数学'
group by
st.class_code
)
select
class_code,
class_total,
concat(excellent, ' ,', round(100 * excellent / class_total, 2), '%') as excellent,
concat(good, ' ,', round(100 * good / class_total, 2), '%') as good,
concat(pass, ' ,', round(100 * pass / class_total, 2), '%') as pass,
concat(fail, ' ,', round(100 * fail / class_total, 2), '%') as fail
from score_ranges
group by class_code
order by class_code;
with total_levels as (
select
student_id,
(case when score >= 110 then 1 else 0 end) as excellent,
(case when score between 90 and 109 then 1 else 0 end) as good,
(case when score between 60 and 89 then 1 else 0 end) as pass,
(case when score < 60 then 1 else 0 end) as fail
from scores
where exam_date = '2024-06-30' and subject = '数学'
)
select
s.class_code,
sum(excellent) as excellent,
sum(good) as good,
sum(pass) as pass,
sum(fail) as fail
from
total_levels as t
inner join
students as s on s.student_id = t.student_id
group by
s.class_code
order by
s.class_code;
a=0,b>0时,c是否应该>0才能经过第二象限?
a>0时,是否不需要其他条件曲线都能经过第二象限?
select *
from numbers_for_fun
where
(
a = 0 and
(
(b > 0 and c > 0)
or
(b < 0)
or
(b = 0 and c < 0)
)
)
or a > 0
or (
a < 0 and
(
(b > 0 and c > 0)
or
(b < 0 and c > b*b/4/a)
)
);
这题考的就是inner join和left join的使用场景。你运行这段代码试试,SELECT
s.singer_id,
s.singer_name,
a.album_id,
a.album_name,
COUNT(l.id) AS play_count
FROM
singer_info s
JOIN
album_info a ON s.singer_id = a.singer_id
inner JOIN
song_info sg ON a.album_id = sg.album_id
inner JOIN
listen_rcd l ON sg.song_id = l.song_id
GROUP BY
s.singer_id, s.singer_name, a.album_id, a.album_name。
sum(
case
when chinese >= 110 then 1
when math >= 110 then 1
when english >= 110 then 1
else 0
end
) >= 2
这种判断不能按预想的执行,因为case when 短路了,第一个条件执行完后面的就不判断了,所以sum永远不能得到2
select
year(list_date) as year,
count(1) as number
from
stock_info
where
year(list_date) in (2020, 2021, 2022, 2023, 2024)
and
name rlike '医|药|生物'
group by
year
order by
year;
select
year(list_date) as year,
count(1) as number
from
stock_info
where
year(list_date) in (2020, 2021, 2022, 2023, 2024)
and
name rlike '.*(医|药|生物).*'
group by
year
order by
year;
select
year(list_date) as year,
count(1) as number
from
stock_info
where
year(list_date) in (2021, 2022, 2023, 2024)
and
name rlike '[医|药|生物]'
group by
year
order by
year;
select
industry,
avg(length(name)) as avg_length,
max(length(name)) as max_length,
min(length(name)) as min_length
from
stock_info
where
industry in ('通信设备', '生物制药', '旅游景点', '医疗保健', '食品')
group by
industry
order by
industry;
select
industry,
avg(char_length(name)) as avg_length,
max(char_length(name)) as max_length,
min(char_length(name)) as min_length
from
stock_info
where
industry in ('通信设备', '生物制药', '旅游景点', '医疗保健', '食品')
group by
industry
order by
industry;
with area_grouping as (
select
area,
count(1) as total_companies,
sum(case when name like '中%' or name like '%中国%' then 1 else 0 end) as china_named_companies
from
stock_info
group by
area
)
select
*,
round(china_named_companies / total_companies, 3) as proportion
from
area_grouping
order by
proportion desc
limit 5;
SELECT
area,
COUNT(*) AS total_companies,
SUM(CASE WHEN name LIKE '%中国%' OR name LIKE '中%' THEN 1 ELSE 0 END) AS chinese_named_companies,
ROUND(SUM(CASE WHEN name LIKE '%中国%' OR name LIKE '中%' THEN 1 ELSE 0 END) / COUNT(*),3) AS proportion
FROM stock_info
GROUP BY area
ORDER BY 4 desc
limit 5;
with area_grouping as (
select
area,
sum(case when name like '中%' or name like '%中国%' then 1 else 0 end) as china_named_companies,
count(1) as total_companies
from
stock_info
group by
area
)
select
*,
round(china_named_companies / total_companies, 3) as proportion
from
area_grouping
order by
proportion desc
limit 5;
with industry_rank as (
select
*,
row_number() over(partition by industry order by list_date asc) as rnk
from
stock_info
where
name like '中国%' or name like '%中%'
)
select
ts_code,
symbol,
name,
area,
industry,
list_date
from
industry_rank
where
rnk = 1
order by
industry;
with generator25 as (
select @xi:=@xi+1 as xc from
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc1,
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc2,
(select @xi:=0) xc0
),
max_tag_number as (
select
max(char_length(tag) - char_length(replace(tag, ' ', '')) + 1) as max_tag
from
ks_video_inf
),
limit_xc as (
select
xc
from
generator25 i, max_tag_number n
where
xc <= n.max_tag
),
split_tags as (
select
v.video_id,
trim(substring_index(substring_index(v.tag, ' ', i.xc), ' ', -1)) as tag
from
ks_video_inf as v
inner join
limit_xc as i on i.xc <= char_length(v.tag) - char_length(replace(v.tag, ' ', '')) + 1
)
select
t.tag,
count(1) as total_plays
from
split_tags t
inner join
ks_video_wat_log l on l.video_id = t.video_id
where
l.start_time >= date_sub(current_date, interval 1 month)
group by
t.tag
order by
total_plays desc,
t.tag
limit 1;
with generator25 as (
select @xi:=@xi+1 as xc from
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc1,
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc2,
(select @xi:=0) xc0
),
max_tag_number as (
select
max(char_length(tag) - char_length(replace(tag, ' ', '')) + 1) as max_tag
from
ks_video_inf
),
limit_xc as (
select
xc
from
generator25 i, max_tag_number n
where
xc <= n.max_tag
),
split_tags as (
select
v.video_id,
trim(substring_index(substring_index(v.tag, ' ', i.xc), ' ', -1)) as tag
from
ks_video_inf as v
inner join
limit_xc as i on i.xc <= char_length(v.tag) - char_length(replace(v.tag, ' ', '')) + 1
)
select
t.tag,
count(1) as total_plays
from
split_tags t
inner join
ks_video_wat_log l on l.video_id = t.video_id
where
l.start_time >= date_sub(current_date, interval 1 month)
group by
t.tag
order by
total_plays desc
limit 2;
with generator25 as (
select @xi:=@xi+1 as xc from
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc1,
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc2,
(select @xi:=0) xc0
),
max_tag_number as (
select
max(char_length(tag) - char_length(replace(tag, ' ', '')) + 1) as max_tag
from
ks_video_inf
),
limit_xc as (
select
xc
from
generator25 i, max_tag_number n
where
xc <= n.max_tag
),
split_tags as (
select
v.video_id,
trim(substring_index(substring_index(v.tag, ' ', i.xc), ' ', -1)) as tag
from
ks_video_inf as v
inner join
limit_xc as i on i.xc <= char_length(v.tag) - char_length(replace(v.tag, ' ', '')) + 1
)
select
t.tag,
count( l.uid) as total_plays
from
split_tags t
inner join
ks_video_wat_log l on l.video_id = t.video_id
where
l.start_time >= date_sub(current_date, interval 1 month)
group by
t.tag
order by
total_plays desc
limit 1;
with generator25 as (
select @xi:=@xi+1 as xc from
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc1,
(select 1 union all select 2 union all select 3 union all select 4 union all select 5) xc2,
(select @xi:=0) xc0
),
max_tag_number as (
select
max(char_length(tag) - char_length(replace(tag, ' ', '')) + 1) as max_tag
from
ks_video_inf
),
limit_xc as (
select
xc
from
generator25 i, max_tag_number n
where
xc <= n.max_tag
),
split_tags as (
select
v.video_id,
trim(substring_index(substring_index(v.tag, ' ', i.xc), ' ', -1)) as tag
from
ks_video_inf as v
inner join
limit_xc as i on i.xc <= char_length(v.tag) - char_length(replace(v.tag, ' ', '')) + 1
)
select
t.tag,
count(distinct l.uid) as total_plays
from
split_tags t
inner join
ks_video_wat_log l on l.video_id = t.video_id
where
l.start_time >= date_sub(current_date, interval 1 month)
group by
t.tag
order by
total_plays desc
limit 1;
select
year(list_date) as Y,
count(distinct ts_code) as cnt
from stock_info
where
ts_code like '%BJ'
and
year(list_date) >= 2020 and year(list_date) <= 2024
group by
Y
order by
Y;
select
year(list_date) as Y,
count(distinct ts_code) as cnt
from stock_info
where
area = '北京'
and
year(list_date) >= 2020 and year(list_date) <= 2024
group by
Y
order by
Y;
select
year(list_date) as Y,
count(ts_code) as cnt
from stock_info
where
area = '北京'
and
year(list_date) >= 2020 and year(list_date) <= 2024
group by
Y
order by
Y;
select
count(case when right(card1, 1) = right(card2, 1)
and (left(card1, 1) in ('J', 'K', 'Q', 'A'))
and (left(card2, 1) in ('J', 'K', 'Q', 'A'))
then id else null end) / 2 as cnt,
count(1) / 2 as ttl_cnt,
count(case when right(card1, 1) = right(card2, 1)
and (left(card1, 1) in ('J', 'K', 'Q', 'A'))
and (left(card2, 1) in ('J', 'K', 'Q', 'A'))
then id else null end) / count(1) as p
from hand_permutations
;