Hive-SQL

每天有多少用户首次流失

select t4.aa,count(*)
from
(
select  t3.role_id role_id,min(t3.date1) aa
from
(
select t2.role_id role_id,t2.date1 date1,t2.date2 date2
from
(
select t1.role_id role_id,t1.date date1,lag(t1.date,1) over (partition by role_id) date2
from
(
select *,row_number() over (partition by role_id order by ta.date asc) as rn
from 
(select distinct role_id,split(time," ")[0] date
from g37.logoutrole
where date between 20160902 and 20161219) ta ) t1) t2
where int(datediff(t2.date2,t2.date1)) > 7 ) t3
group by t3.role_id
) t4
group by t4.aa

分位数

from(
select t1.date a,percentile(t1.a, array(0,0.25,0.5,0.75,1)) over (partition by t1.date) b
from
  (select date, role_id,sum(cast(int(fn.json(source, "$.num")) as bigint)) a
   from g37.currency_xunzhang
   where date between 20161108 and 20161115
     and int(fn.json(source,"$.code_op"))= 1
   group by date, role_id) t1
) t2

每天和每个月的去重登录id量
SELECT
month(time),
date,
COUNT(DISTINCT role_id) AS uv,
GROUPING__ID
FROM g17_loginrole_utf8_log
where date >= 20161001
GROUP BY month(time), date
GROUPING SETS (month(time), date)

正则提取

select regexp_extract(source, "level=(\\d+)", 1),count(regexp_extract(source, "level=(\\d+)", 1))
from xxx
where date between 20160902 and 20160925
and key rlike "xxx"
group by regexp_extract(source, "level=(\\d+)", 1)

转换时间格式

select sum(t1.pj), sum(t1.hp)
from
(
select fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)", 1), count(pingjia) as pj, sum(pingjia) as hp
from xxx
where date between 20160825 and 20160926
group by 
fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)",1)
  ) as t1
where t1.pj - t1.hp >= 10

连接字符串

select role_id,split(max(concat(time,"|",get_json_object(source,"$.left_count"))) ,"|")[1]
from xxx as aa
where date between 20160925 and 20160927
group by role_id

最值

select role_id,max(get_json_object(source,"$.left_count")) 
from xxx 
where get_json_object(source,"$.item_name") rlike "aaa" 
and date between 20160925 and 20160926 
group by role_id

时间排序,取最后一条

select t3.server, t3.role_id, t3.cou, t2.cou
from
(
select server,
       role_id,
       max(int(get_json_object(source,"$.left_count"))) as cou
from xxx
where date between 20160915 and 20160926
  and get_json_object(source,"$.item_name") rlike "aaa"
group by server,
         role_id
         ) as t3
join

(
select t1.server, t1.role_id, int(get_json_object(t1.source,"$.left_count")) as cou from 
(
select * , ROW_NUMBER() over (partition by server, role_id ORDER BY time desc) as rn
from xxx
where date between 20160925 and 20160927
and get_json_object(source,"$.item_name") rlike "aaa"
  ) as t1 where t1.rn = 1  ) as t2
on t3.server = t2.server and t3.role_id = t2.role_id

where t3.cou = t2.cou

rlike正则(或)

select player
from xxx
where player rlike "重叠|叠加|冲突|触发"
and date between 20161001 and 20161008

创建外部表

create external table table_name (
  name string,
  urs string,
  ccid int,
  uid int,
  hn int,
  role_id int
  )
  row format delimited fields terminated by "\t"
  stored as textfile
  location '/path/file/'

文件引用为表

insert overwrite directory 'hdfs:///path/path/sjsj'
select a.role_id,time,fn.json(source,"$.role_level") 
from xxx as a where date >=20160401 and date <=20160430
and fn.json(source,"$.reason") ="player-upgrade"
and fn.json(source,"$.type") ="lv"
and fn.json(source,"$.role_level")  in ('20','60')
and a.role_id in
(select server from xxx)
===
and a.role_id in
(select server from xxx)

流失天数

select t1.server,t1.account_id,t1.liushi,t1.cost,t2.role_id,t2.udid
from
(select server,account_id,(20161013-max(date)) liushi,max(int(fn.json(source, "$.total_cost"))) cost
from loginrole
where date between 20161006 and 20161013
 group by server,account_id
) t1
join
(
 select fn.json(source, "$.account_id") account_id,fn.json(source, "$.role_id") role_id,fn.json(source, "$.udid") udid,fn.json(source, "$.server") server
from createrole
where date between 20161006 and 20161012) t2
on t1.server = t2.server
where t1.account_id = t2.account_id

实际登录天数

select t4.server,t4.account_id,count(cc)
from
 (
 select server,account_id,split(time, " ")[0] cc
 from loginrole
 where date between 20161006 and 20161015
 group by server,account_id,split(time, " ")[0]
  ) t4
  group by t4.server,t4.account_id

key是数组

select role_id,count(*)
from xxx
where fn.to_array(fn.json(source, "$.cost[0]"))[0] in ("490001", "490003")
and date between 20160902 and 20160916
group by role_id

SUBSTRING ( expression, start, length )

select distinct aa.account_id
from xxx aa
where date between 20161006 and 20161017
and int(aa.server)<=20000
and int(fn.json(source,"$.level"))>5
and concat(aa.account_id,"|",concat(substring(aa.date,1,4),"-",substring(aa.date,5,2),"-",substring(aa.date,7,2)))
in (
select concat(account_id,"|",date_add(concat(substring(date,1,4),"-",substring(date,5,2),"-",substring(date,7,2)),3)) host_iid_seven
from xxx
where date between 20161006 and 20161017
and int(server)<=20000
)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 156,069评论 4 358
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 66,212评论 1 287
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 105,912评论 0 237
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 43,424评论 0 202
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 51,741评论 3 285
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 40,194评论 1 206
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 31,553评论 2 307
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 30,289评论 0 194
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 33,923评论 1 237
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 30,251评论 2 240
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 31,775评论 1 255
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 28,144评论 2 249
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 32,698评论 3 228
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 25,936评论 0 8
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 26,658评论 0 192
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 35,214评论 2 267
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 35,159评论 2 258

推荐阅读更多精彩内容