输出的是和输出示例是一样的呀,问题在哪?
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
记录一下第一次算峰值,想了很久,不知道还有没有其他的简单方法
with data1 as(
select usr_id, live_id, enter_time as time1, 1 as flag1
from ks_live_t1
union all
select usr_id, live_id, leave_time as time1, -1 as flag1
from ks_live_t1
),
data2 as(
select
live_id
, sum(flag1) over(partition by live_id order by time1 ) as online_users
from data1
)
select t1.live_id,t2.live_nm, max(online_users) as max_online_users
from data2 t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
group by 1, 2
order by 3 desc
如果活跃的定义就是登录的话,那么只需要判断最后一次登录时间是否 <=2024-10-01 不就可以吗
select count(1) as inactive_user_count
from(
select usr_id, max(login_time)
from user_login_log
group by 1
having max(login_time) <= '2024-10-01'
) tmp
select
year(dt) as Y
, cast(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as beijing
, cast(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as shanghai
, cast(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as shenzhen
, cast(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as `year`
,cast(avg(REGEXP_SUBSTR(tmp_h, '-?[0-9]+')) as decimal(4, 2))as avg_tmp_h
,CASE
WHEN ABS(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)) -
LAG(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)), 1) OVER (ORDER BY year(dt))) >= 1
THEN 'Yes'
ELSE 'No'
END as significant_change
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city = 'shenzhen'
group by 1
order by 1
WITH yearly_avg AS (
SELECT
city,
YEAR(dt) AS year,
AVG(REPLACE(tmp_h, '℃', '') + 0) AS avg_high_temp
FROM weather_rcd_china
WHERE city = 'shenzhen'
AND dt BETWEEN '2011-01-01' AND '2022-12-31'
GROUP BY city, YEAR(dt)
),
yearly_avg_with_lag AS (
SELECT
city,
year,
avg_high_temp,
LAG(avg_high_temp) OVER (ORDER BY year) AS prev_year_avg_temp
FROM yearly_avg
),
temp_changes AS (
SELECT
year,
avg_high_temp,
prev_year_avg_temp,
(avg_high_temp - COALESCE(prev_year_avg_temp, 0)) AS temp_change
FROM yearly_avg_with_lag
)
SELECT
year,
cast(avg_high_temp as decimal(10,2)) as avg_tmp_h,
CASE
WHEN ABS(avg_high_temp - COALESCE(prev_year_avg_temp, 0)) >=1 THEN 'Yes'
ELSE 'No'
END AS significant_change
FROM temp_changes
WHERE year BETWEEN 2011 AND 2022
ORDER BY year;
select
year(dt) as `year`
, format(avg(REGEXP_SUBSTR(tmp_h, '-?[0-9]+')), 2)as avg_tmp_h
,CASE
WHEN ABS(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)) -
LAG(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)), 1) OVER (ORDER BY year(dt))) >= 1
THEN 'Yes'
ELSE 'No'
END as significant_change
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city = 'shenzhen'
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as '广州'
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
city
, sum(case when con like '%雪%' then 1 else 0 end) as snowy_days
from weather_rcd_china
where month(dt) in (12, 1, 2)
group by 1
order by 2 desc
select
city
, sum(case when con like '%多云%' then 1 else 0 end) as cloudy_days
, concat(format((sum(case when con like '%多云%' then 1 else 0 end) / count(1)) * 100, 2), '%') as P
from weather_rcd_china
where year(dt) = '2021'
group by 1
order by 3 desc
SELECT
HOUR(local_call_time) AS local_hour,
COUNT(1) AS cnt
FROM
(
SELECT
order_id,
cust_uid,
DATE_ADD(call_time, INTERVAL -4 HOUR) AS local_call_time,
grab_time,
cancel_time,
finish_time
FROM
didi_order_rcd
) t
GROUP BY
HOUR(local_call_time)
ORDER BY
cnt DESC
select hour(local_time) as local_time, count(1) as cnt
from (
selectCONVERT_TZ(call_time, '+00:00', '-04:00') AS local_time
from didi_order_rcd
) t1
group by 1
order by 2 desc
select hour(local_time) as local_time, count(1) as cnt
from (
selectCONVERT_TZ(call_time, '+00:00', '-04:00') AS local_time
from didi_order_rcd
) t1
group by 1
order by 2 desc, 1
select
count(1) as total_orders
, count(grab_time >= call_time or null ) as answered_orders
, concat(round((count(grab_time >= call_time or null ) / count(1))*100, 2), '%') as answer_rate
from didi_order_rcd
where
substring(call_time,1,10) = '2021-05-03'
WITH UserActivity AS (
SELECT
t1.usr_id,
t1.live_id,
t1.enter_time AS event_time,
1 AS act
FROM
ks_live_t1 t1
UNION ALL
SELECT
t1.usr_id,
t1.live_id,
t1.leave_time AS event_time,
-1 AS act
FROM
ks_live_t1 t1
),
CumulativeOnline AS (
SELECT
live_id,
event_time,
SUM(act) OVER (PARTITION BY live_id ORDER BY event_time) AS online_users
FROM
UserActivity
),
PeakOnline AS (
SELECT
live_id,
event_time,
online_users,
MAX(online_users) OVER (PARTITION BY live_id) AS max_online_users
FROM
CumulativeOnline
),
FirstLastPeak AS (
SELECT
live_id,
min(event_time) AS first_peak_time,
max(event_time) AS last_peak_time,
max_online_users
FROM
PeakOnline
WHERE
online_users = max_online_users
GROUP BY
live_id, max_online_users
)
SELECT
flp.live_id,
t2.live_nm,
flp.max_online_users,
flp.first_peak_time,
flp.last_peak_time
FROM
FirstLastPeak flp
JOIN
ks_live_t2 t2
ON
flp.live_id = t2.live_id
ORDER BY
flp.max_online_users DESC;
with data1 as(
select usr_id, live_id, enter_time as time1, 1 as flag1
from ks_live_t1
union all
select usr_id, live_id, leave_time as time1, -1 as flag1
from ks_live_t1
),
data2 as(
select
live_id
, sum(flag1) over(partition by live_id order by time1 ) as online_users
from data1
)
select t1.live_id,t2.live_nm, max(online_users) as max_online_users
from data2 t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
group by 1, 2
order by 3 desc
with data1 as(
select usr_id, live_id, enter_time as time1, 1 as flag1
from ks_live_t1
union all
select usr_id, live_id, leave_time as time1, -1 as flag1
from ks_live_t1
),
data2 as(
select
live_id
, time1
, sum(flag1) over(partition by live_id order by time1 ) as online_users
from data1
),
data3 as(
select live_id, max(online_users) as max_online_users
from data2
group by 1
)
select
t1.live_id
, t2.live_nm
, t3.max_online_users as max_online_users
, min(t1.time1) as first_peak_time
, max(t1.time1) as last_peak_time
from data2 t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id left join data3 t3 on t1.live_id = t3.live_id
where t1.online_users = t3.max_online_users
group by 1, 2, 3
order by 3 desc
with data1 as(
select usr_id, live_id, enter_time as time1, 1 as flag1
from ks_live_t1
union all
select usr_id, live_id, leave_time as time1, -1 as flag1
from ks_live_t1
),
data2 as(
select
live_id
, sum(flag1) over(partition by live_id order by time1 ) as online_users
from data1
)
select t1.live_id,t2.live_nm, max(online_users) as max_online_users
from data2 t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
group by 1,2
order by 3 desc
select t1.live_id, t2.live_nm, online_users
from (
select
live_id
, count(distinct usr_id) as online_users
from ks_live_t1
where enter_time <= '2021-09-12 23:48:38' and leave_time >= '2021-09-12 23:48:38'
group by 1
) t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
order by 3 desc,1
select t1.live_id, t2.live_nm, online_users
from (
select
live_id
, count(distinct usr_id) as online_users
from ks_live_t1
where enter_time <= '2021-09-12 23:48:38' and leave_time >= '2021-09-12 23:48:38'
group by 1
) t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
order by 3 desc,1 desc