Skip to content

本章主要介绍 Doris 建表,分区表以及 resource,catalog 的创建以及管理。

Doris 分区表

建表语句:

sql
CREATE TABLE IF NOT EXISTS video_gps(
    `vehicle_id` varchar COMMENT "车辆ID",
	`terminal_code` varchar COMMENT "车辆编码",
    `clientid` int COMMENT "所属客户ID",
    `location_mode` INT COMMENT "定位模式",
    `terminal_id` varchar COMMENT "设备id",
	`gpsdate` DATE COMMENT "卫星定位日期:yyyy-MM-dd",
    `gpstime` DATETIME COMMENT "卫星定位时间",
    `recvtime` DATETIME COMMENT "服务器时间",
    `serverid` INT COMMENT "服务器id",
    `speed` double COMMENT "速度",
    `av` INT COMMENT "有效性",
    `alt` double COMMENT "高度",
	`dir` INT COMMENT "方向",
	`satellite` int  comment "卫星个数",
	`lng` double  comment "原始经度",
	`lat` double  comment "原始纬度",
	`glng_xz` double comment "谷歌经度修正值",
	`glat_xz` double comment "谷歌纬度修正值",
	`blng_xz` double comment "百度经度修正值",
	`blat_xz` double comment "百度纬度修正值",
	`electricity` int comment "电量",
	`duelectricity` double comment "电量v",
	`gsm` int comment "gsm数量",
	`lbs` int comment "基站数",
	`state` int comment "状态",
	`today_distance` double comment "当天里程",
	`distance` double  comment "里程",
	`last_run_time` datetime comment "最后一次位置",
	`alarm` int  comment "报警",
	`alarmsoft` int  comment "软件报警",
	`alarm1078` int  comment "1078报警",
	 param varchar,
	 wifi int,
	 markids varchar,
	 accuracy int,
	 electnum int,
	 temp1 double, 
	 temp2 double, 
	 temp3 double, 
	 temp4 double, 
	 humidity1 double, 
	 humidity2 double, 
	 humidity3 double, 
	 humidity4 double,
	 oil  double, 
	 oil2  double, 
	 oil3  double, 
	 oil4  double, 
	 vol1  double, 
	 vol2  double, 
	 vol3  double, 
	 vol4  double, 
	 sumoil  double,
	 sumother  double, 
	 roadlevel varchar,
	 maxspeed double,
	 logger_speed double,
	 alarmcustid varchar,
	 posinfo varchar,
	 identityno varchar,
	 weight double,
	 weight2 double,
	 zhongliang double,
	 zhongliang2 double,
	 alarm2 int,
	 alarmsoft2 int,
	 dayacctime bigint,
	 totalacctime bigint
)
engine=OLAP
DUPLICATE KEY(vehicle_id, terminal_code,clientid)
PARTITION BY RANGE(gpsdate) ()
DISTRIBUTED BY HASH(`vehicle_id`) BUCKETS 5
PROPERTIES (
  "replication_num" = "1",
  "bloom_filter_columns"="vehicle_id,terminal_code,clientid,location_mode",
  "dynamic_partition.enable" = "true",
  "dynamic_partition.time_unit" = "DAY",
  "dynamic_partition.history_partition_num" = "1",
  "dynamic_partition.start" = "-4096",
  "dynamic_partition.end" = "3",
  "dynamic_partition.prefix" = "p",
  "dynamic_partition.buckets" = "5"
);

调整分区:

sql
ALTER TABLE video_gps SET (
    "dynamic_partition.enable" = "true",
    "dynamic_partition.time_unit" = "DAY",
    "dynamic_partition.history_partition_num" = "1",
    "dynamic_partition.start" = "-4096",
    "dynamic_partition.end" = "3",
    "dynamic_partition.prefix" = "p",
    "dynamic_partition.buckets" = "5"
);

调整BloomFilter索引:

sql
ALTER TABLE exlive3.video_gps SET ("bloom_filter_columns" = "vehicle_id,terminal_code,clientid,location_mode");

上述开启自动分区,会从日期中读取 gpsdate 字段,进行自动分区并落入数据中。

分区字段解释:

  • dynamic_partition.start

动态分区的起始偏移,为负数。根据 time_unit 属性的不同,以当天(星期/月)为基准,分区范围在此偏移之前的分区将会被删除。如果不填写,则默认为 -2147483648,即不删除历史分区。 start 为保留过去天数的日期,过去后会删除数据,请注意。

  • dynamic_partition.end(必选参数)

动态分区的结束偏移,为正数。根据 time_unit 属性的不同,以当天(星期/月)为基准,提前创建未来对应范围的分区。

  • dynamic_partition.history_partition_num

当 create_history_partition 为 true 时,该参数用于指定创建历史分区数量。默认值为 -1, 即未设置。

sql
CREATE TABLE IF NOT EXISTS user_lab(
    `device_id` LARGEINT NOT NULL COMMENT "设备id",
    `date` DATE NOT NULL COMMENT "数据灌入日期时间",
    `city` VARCHAR(20) COMMENT "用户所在城市",
    `age` SMALLINT COMMENT "用户年龄",
    `sex` TINYINT COMMENT "用户性别",
    `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
    `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
    `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
    `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间"
)
engine=OLAP
DUPLICATE KEY(device_id)
PARTITION BY RANGE(k1) ()
(
FROM ("2013-01-01") TO ("2023-01-01") INTERVAL 1 DAY
)
DISTRIBUTED BY HASH(`device_id`) BUCKETS 4
PROPERTIES (
    "replication_num" = "1",
    "replication_allocation" = "tag.location.default: 1",
	"dynamic_partition.enable" = "true",
    "dynamic_partition.time_unit" = "DAY",
    "dynamic_partition.start" = "-7",
    "dynamic_partition.end" = "3",
    "dynamic_partition.prefix" = "p",
    "dynamic_partition.buckets" = "8"
);

基本查询

sql
-- 查看 resources 资源
show resources;
-- 查看总的命名空间
show catalogs;
-- 查看指定表的分区
SHOW PARTITIONS FROM video_gps;
-- 查看某表的建表SQL
show create table db.table;
-- 查看当前的导入任务
show load;
-- 查看表索引
SHOW INDEX FROM db.table;
- 查看表的统计信息,如:更新、数据量、存储量
SHOW TABLE STATS db.table;

如果需要手动分区,需要将分区关闭自动:

sql
ALTER TABLE video_gps SET (
    "dynamic_partition.enable" = "false",
    "dynamic_partition.buckets" = "8"
);

然后手动添加一个分区(请注意,开闭区间:包含start用“[”,结束区间用“)”,我在这里出错还一直没看出来):

sql
ALTER TABLE video_gps 
ADD PARTITION p20250701
VALUES [("2025-07-01"), ("2025-07-02"));

删除分区:

sql
ALTER TABLE video_gps DROP PARTITION p20250701;

Doris Resource

创建 mysql 库的 resource 或 catalog,需采用 mysql-jdbc 驱动;

创建 jdbc_drivers 并将 mysql-connector-j-8.0.32.jar 放入。

shell
ln -s jdbc_drivers fe/jdbc_drivers
ln -s jdbc_drivers be/jdbc_drivers

并将 fe 和 be conf/fe.conf conf/be.conf 下的 jdbc_drivers_dir 修改

shell
jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers

重启 doris。

创建一个 mysql 的 resource

sql
CREATE EXTERNAL RESOURCE mysql_video_jdbc_resource
properties (
  "type"="jdbc",
  "user"="root",
  "password"="root",
  "jdbc_url"="jdbc:mysql://172.18.0.32:3566/video?characterEncoding=utf8&useSSL=false&useCursorFetch=true",
  "driver_url"="file:///home/exlive/software/apache-doris/driver/mysql-connector-j-8.0.32.jar",
  "driver_class"="com.mysql.cj.jdbc.Driver"
);

CREATE EXTERNAL TABLE video_adas_client_score_2024 ( id int COMMENT '自增id', clientId int, vhcCount int, type int, reportDate datetime, score double, mileage double, highRiskCount int, midRiskCount int, lowRiskCount int, momHighRiskCount int, momMidRiskCount int, momLowRiskCount int, alarmCounts varchar, dayAlarmCounts varchar, nightAlarmCounts varchar, highSpeedAlarmCounts varchar, lowSpeedAlarmCounts varchar, table_id varchar, report varchar ) ENGINE=JDBC PROPERTIES ( "resource" = "mysql_video_jdbc_resource", "table" = "video_adas_client_score_2024", "table_type"="mysql" );

Doris Catalog

创建 mysql 库的 resource 或 catalog,需采用 mysql-jdbc 驱动;

创建 jdbc_drivers 并将 mysql-connector-j-8.0.32.jar 放入。

shell
ln -s jdbc_drivers fe/jdbc_drivers
ln -s jdbc_drivers be/jdbc_drivers

并将 fe 和 be conf/fe.conf conf/be.conf 下的 jdbc_drivers_dir 修改

shell
jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers

重启 doris。

sql
CREATE CATALOG video PROPERTIES (
    "type"="jdbc",
    "user"="root",
    "password"="root",
    "jdbc_url" = "jdbc:mysql://172.18.0.32:3566/video?characterEncoding=utf8&useSSL=false&useCursorFetch=true",
    "driver_url" = "file:///home/exlive/software/apache-doris/driver/mysql-connector-j-8.0.32.jar",
    "driver_class" = "com.mysql.cj.jdbc.Driver",
	"metadata_refresh_interval_sec"="20"
);

-- 如果数据库驱动已经在 classpath 中,则
CREATE CATALOG video PROPERTIES (
    "type"="jdbc",
    "user"="root",
    "password"="FC3#rtPu20@tn8749m",
    "jdbc_url" = "jdbc:mysql://172.25.224.205:3566/video?characterEncoding=utf8&useSSL=false&useCursorFetch=true",
    "driver_class" = "com.mysql.cj.jdbc.Driver",
    "driver_url" = "file:///home/exlive3/doris/be/jdbc_drivers/mysql-connector-j-8.0.32.jar",
	"metadata_refresh_interval_sec"="60"
);
sql
-- 批量导入
INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a");

-- insert select 方式导入
INSERT INTO tbl2 WITH LABEL label1 
SELECT * FROM tbl3;

-- 从文件批量导入
LOAD DATA
INFILE '/path/test_data.csv'
INTO TABLE test_db.MySQL_Load_tbl
COLUMNS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES

实时 Update 导致版本大量增加

参数调优配置

sql
-- 调整合并策略
ALTER TABLE tbl SET ("cumulative_compaction_min_deltas" = "5");
ALTER TABLE tbl SET ("cumulative_compaction_max_deltas" = "50");

-- 限制最大版本数
ALTER TABLE tbl SET ("max_tablet_version_num" = "500");

监控与维护

sql
-- 查看版本分布
SHOW TABLET FROM tbl WHERE VersionCount > 50;

-- 检查合并状态
SHOW PROC "/compactions";

-- 手动触发合并
ADMIN COMPACT TABLE tbl;

-- 手动合并
ADMIN COMPACT TABLE exlive3.video_gps PARTITION p20250814 WHERE type = 'CUMULATIVE';