连接 Clickhouse
1./clickhouse-client -h 9.218.16.160 --port 9000 -u 'yyy' --password 'zzzzz'
查询库表信息
1use databases;
2select * from table;
3show create table;
查询数据库数据量大小
1##按照数据量倒序
2SELECT database, formatReadableSize(on_disk) FROM (SELECT database, sum(bytes_on_disk) AS on_disk FROM system.parts GROUP BY database) ORDER BY on_disk DESC;
3
4SELECT database, formatReadableSize(sum(bytes_on_disk)) AS on_disk FROM system.parts GROUP BY database ORDER BY on_disk DESC;
查询表数据量大小
1select database, table, sum(rows) as "总行数", formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) as "压缩大小", 2*CEILING(sum(data_compressed_bytes)/(10*1024*1024*1024*1024)/0.7)*3+3 as "计算单元数", 2*CEILING(sum(data_compressed_bytes)/(10*1024*1024*1024*1024)/0.7)*2+3 as "存储单元数", round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率" from system.parts group by database,table order by sum(data_compressed_bytes) desc limit 100;
2
3
4select database, table, sum(rows) as "总行数", formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) as "压缩大小", round(sum(data_compressed_bytes) / sum(data_uncompressed_bytes) * 100, 0) "压缩率" from system.parts group by database,table order by sum(data_compressed_bytes) desc limit 100;
设置数据显示格式
1SELECT * FROM TABLE FORMAT Vertical
查询某范围时间的数据
1## 查询近几小时
2SELECT stactics_date FROM table where subtractHours(stactics_date, 7) > toDate(now());
3
4## 查询今天
5SELECT stactics_date FROM table where toDate(stactics_date) = today();
6
7## 查询昨天
8SELECT stactics_date FROM table where toDate(stactics_date) = yesterday();
9
10##查询近几天
11SELECT stactics_date FROM table where subtractDays(stactics_date, 7) > toDate(now());
12
13## 查询本周
14SELECT stactics_date FROM table where stactics_date BETWEEN subtractDays(stactics_date, toDayOfWeek(stactics_date)) and subtractDays(stactics_date, toDayOfWeek(tactics_date) + 7);
15
16##查询本月
17SELECT stactics_date FROM table where toMonth(tactics_date) = toMonth(now());
18
19##按照自定义时间段(小时维度)
20SELECT stactics_date FROM table where stactics_date BETWEEN yesterday() and subtractHours(stactics_date, 1);
21
22##按照自定义时间段(字符时间格式加减天数)
23SELECT stactics_date FROM table where stactics_date BETWEEN
24DATE_SUB( str_to_date({startTime},'YYYY-MM-DD HH24:MI:SS'), INTERVAL 1 DAY )
25AND DATE_SUB( str_to_date({endTime},'YYYY-MM-DD HH24:MI:SS'), INTERVAL 1 DAY );
26
27## 字符串日期自定义时间
28SELECT COUNT(*) from table where parseDateTimeBestEffort(import_date) BETWEEN parseDateTimeBestEffort('20220831') AND parseDateTimeBestEffort('20230217');
29
30## 每天时间段内
31SELECT stactics_date FROM table where toYYYYMMDDhhmmss(tactics_date)%1000000 BETWEEN '123000' and '160000';
32
33## 按小时分组
34SELECT
35 stactics_name,
36 stactics_id,
37 concat(toString(toHour(stactics_date)), '-', toString(toHour(stactics_date) + 1), '点') stacticsDate,count(*) stacticsCount
38FROM table GROUP BY stactics_name, stactics_id,toHour(stactics_date);
39
40## 按天分组
41SELECT stactics_name,stactics_id,toDate(stactics_date) stacticsDate,count(*) stacticsCount
42FROM table GROUP BY stactics_name,stactics_id,toDate(stactics_date);
添加表字段
1ALTER TABLE t_all_behavior_data_test ON CLUSTER 'default_cluster' ADD COLUMN is_valid_click Int8 AFTER act_type;
2
3ALTER TABLE t_all_behavior_data_test ON CLUSTER 'default_cluster'
4MODIFY COLUMN is_valid_click Int8
5COMMENT '是否有效点击';
导出数据
1clickhouse-client -h 9.138.131.21 --port 9000 -u 'xxxx' --password 'xxxx' --database='data_cube_ad' --query="select imp_date,user_id,action_time,action_type,context_id,action_item_object_id,action_item_object_type,title,url from data_cube_ad.t_all_behavior_data_view where imp_date=20241119 and context_id_cn='微信公众号' and action_type_cn='阅读' and action_item_object_type_cn='文章' and title!='' and url='' limit 5 FORMAT CSV" > article.csv
导入数据
1clickhouse-client -h 127.0.0.1 --database='db' --query='insert into db.test FORMAT CSV' < ./test.csv
删除数据
1# 1、首先查询出分区列表
2SELECT partition AS partition FROM system.parts WHERE table = 'middle_ad_expo_ka_user_daily_local' AND partition >= '20210101' AND partition < '20241225' order by partition desc;
3
4# 2、使用文本编辑器处理,最终格式如下:
5DROP PARTITION '20240113',DROP PARTITION '20240112',DROP PARTITION '20240111',DROP PARTITION '20240110',DROP PARTITION '20240109',DROP PARTITION '20240108';
6
7# 3、执行删除语句
8ALTER TABLE middle_ad_expo_ka_user_daily_local DROP PARTITION '20240113',DROP PARTITION '20240112',DROP PARTITION '20240111',DROP PARTITION '20240110',DROP PARTITION '20240109',DROP PARTITION '20240108';