Skip to content

Commit eb7502a

Browse files
committed
优化文档db2side impalaside sqlserverside
1 parent 83571b4 commit eb7502a

File tree

3 files changed

+344
-116
lines changed

3 files changed

+344
-116
lines changed

docs/plugin/db2Side.md

Lines changed: 107 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
21
## 1.格式:
3-
```
2+
3+
通过建表语句中的` PERIOD FOR SYSTEM_TIME`将表标识为维表,其中`PRIMARY KEY(keyInfo)`中的keyInfo,表示用来和源表进行关联的字段,
4+
维表JOIN的条件必须与`keyInfo`字段一致。
5+
6+
```sql
47
CREATE TABLE tableName(
58
colName cloType,
69
...
@@ -20,41 +23,48 @@
2023
);
2124
```
2225

23-
# 2.支持版本
26+
## 2.支持版本
27+
2428
db2 9.X
25-
29+
2630
## 3.表结构定义
27-
28-
|参数名称|含义|
29-
|----|---|
30-
| tableName | db2表名称|
31-
| colName | 列名称|
32-
| colType | 列类型 [colType支持的类型](docs/colType.md)|
33-
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
34-
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
35-
31+
32+
[维表参数信息](docs/plugin/sideParams.md)
33+
34+
db2独有的参数
35+
36+
| 参数名称 | 含义 | 是否必填 | 默认值 |
37+
| -------- | ------------------ | -------- | ------ |
38+
| type | 维表类型, db2 || |
39+
| url | 连接数据库 jdbcUrl || |
40+
| userName | 连接用户名 || |
41+
| password | 连接密码 || |
42+
3643
## 4.参数
3744

38-
|参数名称|含义|是否必填|默认值|
39-
|----|---|---|----|
40-
| type | 表明维表的类型 db2 |||
41-
| url | 连接mysql数据库 jdbcUrl |||
42-
| userName | db2连接用户名 |||
43-
| password | db2连接密码|||
44-
| tableName | db2表名称|||
45-
| cache | 维表缓存策略(NONE/LRU)||NONE|
46-
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
47-
48-
----------
49-
> 缓存策略
50-
* NONE: 不做内存缓存
51-
* LRU:
52-
* cacheSize: 缓存的条目数量
53-
* cacheTTLMs:缓存的过期时间(ms)
54-
55-
56-
## 5.样例
45+
### ALL全量维表定义
46+
47+
```sql
48+
// 定义全量维表
49+
CREATE TABLE sideTable(
50+
id INT,
51+
name VARCHAR,
52+
PRIMARY KEY(id) ,
53+
PERIOD FOR SYSTEM_TIME
54+
)WITH(
55+
type ='db2',
56+
url ='jdbc:db2://172.16.8.104:50000/test?charset=utf8',
57+
userName ='dtstack',
58+
password ='abc123',
59+
tableName ='all_test_db2',
60+
cache ='ALL',
61+
cacheTTLMs ='60000',
62+
parallelism ='1'
63+
);
5764
```
65+
### LRU异步维表定义
66+
67+
```sql
5868
create table sideTable(
5969
channel varchar,
6070
xccount int,
@@ -65,15 +75,78 @@ create table sideTable(
6575
url='jdbc:db2://172.16.8.104:50000/test?charset=utf8',
6676
userName='dtstack',
6777
password='abc123',
68-
tableName='sidetest',
78+
tableName='lru_test_db2',
6979
cache ='LRU',
7080
cacheSize ='10000',
7181
cacheTTLMs ='60000',
7282
parallelism ='1',
7383
partitionedJoin='false'
7484
);
85+
```
7586

87+
### Db2异步维表关联
7688

77-
```
89+
```sql
90+
CREATE TABLE MyTable(
91+
id int,
92+
name varchar
93+
)WITH(
94+
type ='kafka11',
95+
bootstrapServers ='172.16.8.107:9092',
96+
zookeeperQuorum ='172.16.8.107:2181/kafka',
97+
offsetReset ='latest',
98+
topic ='cannan_yctest01',
99+
timezone='Asia/Shanghai',
100+
enableKeyPartitions ='false',
101+
topicIsPattern ='false',
102+
parallelism ='1'
103+
);
78104

105+
CREATE TABLE MyResult(
106+
id INT,
107+
name VARCHAR
108+
)WITH(
109+
type='db2',
110+
url='jdbc:db2://172.16.8.104:50000/test?charset=utf8',
111+
userName='dtstack',
112+
password='abc123',
113+
tableName ='test_db2_zf',
114+
updateMode ='append',
115+
parallelism ='1',
116+
batchSize ='100',
117+
batchWaitInterval ='1000'
118+
);
119+
120+
CREATE TABLE sideTable(
121+
id INT,
122+
name VARCHAR,
123+
PRIMARY KEY(id) ,
124+
PERIOD FOR SYSTEM_TIME
125+
)WITH(
126+
type='db2',
127+
url='jdbc:db2://172.16.8.104:50000/test?charset=utf8',
128+
userName='dtstack',
129+
password='abc123',
130+
tableName ='test_db2_10',
131+
partitionedJoin ='false',
132+
cache ='LRU',
133+
cacheSize ='10000',
134+
cacheTTLMs ='60000',
135+
asyncPoolSize ='3',
136+
parallelism ='1'
137+
);
138+
139+
insert
140+
into
141+
MyResult
142+
select
143+
m.id,
144+
s.name
145+
from
146+
MyTable m
147+
join
148+
sideTable s
149+
on m.id=s.id;
150+
151+
```
79152

docs/plugin/impalaSide.md

Lines changed: 126 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
21
## 1.格式:
3-
```
2+
3+
```sql
44
CREATE TABLE tableName(
55
colName cloType,
66
...
@@ -20,51 +20,65 @@
2020
);
2121
```
2222

23-
# 2.支持版本
23+
## 2.支持版本
24+
2425
2.10.0-cdh5.13.0
25-
26+
2627
## 3.表结构定义
27-
28-
|参数名称|含义|
29-
|----|---|
30-
| tableName | 注册到flink的表名称|
31-
| colName | 列名称|
32-
| colType | 列类型 [colType支持的类型](docs/colType.md)|
33-
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
34-
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
35-
36-
## 4.参数
37-
38-
|参数名称|含义|是否必填|默认值|
39-
|----|---|---|----|
40-
| type | 表明维表的类型[impala] |||
41-
| url | 连接postgresql数据库 jdbcUrl |||
42-
| userName | postgresql连接用户名 |||
43-
| password | postgresql连接密码|||
44-
| tableName | postgresql表名称|||
45-
| authMech | 身份验证机制 (0, 1, 2, 3), 暂不支持kerberos ||0|
46-
| principal | kerberos用于登录的principal(authMech=1时独有) |authMech=1为必填|
47-
| keyTabFilePath | keytab文件的路径(authMech=1时独有) |authMech=1为必填 ||
48-
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) |authMech=1为必填||
49-
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) |authMech=1为必填||
50-
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
51-
| enablePartition | 是否支持分区||false|
52-
| partitionfields | 分区字段名|否,enablePartition='true'时为必填||
53-
| partitionFieldTypes | 分区字段类型 |否,enablePartition='true'时为必填||
54-
| partitionValues | 分区值|否,enablePartition='true'时为必填||
55-
| cache | 维表缓存策略(NONE/LRU/ALL)||NONE|
56-
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
57-
58-
----------
59-
> 缓存策略
60-
* NONE: 不做内存缓存
61-
* LRU:
62-
* cacheSize: 缓存的条目数量
63-
* cacheTTLMs:缓存的过期时间(ms)
64-
65-
66-
## 5.样例
28+
29+
[维表参数信息](docs/plugin/sideParams.md)
30+
31+
impala独有的参数配置
32+
33+
| 参数名称 | 含义 | 是否必填 | 默认值 |
34+
| ------------------- | ------------------------------------------------------------ | --------------------------------- | ---------- |
35+
| type | 表明维表的类型[impala] || |
36+
| url | 连接postgresql数据库 jdbcUrl || |
37+
| userName | postgresql连接用户名 || |
38+
| password | postgresql连接密码 || |
39+
| tableName | postgresql表名称 || |
40+
| authMech | 身份验证机制 (0, 1, 2, 3), 暂不支持kerberos || 0 |
41+
| principal | kerberos用于登录的principal(authMech=1时独有) | authMech=1为必填 | |
42+
| keyTabFilePath | keytab文件的路径(authMech=1时独有) | authMech=1为必填 | |
43+
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) | authMech=1为必填 | |
44+
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) | authMech=1为必填 | |
45+
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
46+
| enablePartition | 是否支持分区 || false |
47+
| partitionfields | 分区字段名 | 否,enablePartition='true'时为必填 | |
48+
| partitionFieldTypes | 分区字段类型 | 否,enablePartition='true'时为必填 | |
49+
| partitionValues | 分区值 | 否,enablePartition='true'时为必填 | |
50+
| cache | 维表缓存策略(NONE/LRU/ALL) || NONE |
51+
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量) || false |
52+
53+
## 4.样例
54+
55+
### ALL全量维表定义
56+
57+
```sql
58+
// 定义全量维表
59+
CREATE TABLE sideTable(
60+
id INT,
61+
name VARCHAR,
62+
PRIMARY KEY(id) ,
63+
PERIOD FOR SYSTEM_TIME
64+
)WITH(
65+
type ='mysql',
66+
url ='jdbc:impala://localhost:21050/mqtest',
67+
userName ='dtstack',
68+
password ='1abc123',
69+
tableName ='test_impala_all',
70+
authMech='3',
71+
cache ='ALL',
72+
cacheTTLMs ='60000',
73+
parallelism ='2',
74+
partitionedJoin='false'
75+
);
76+
6777
```
78+
79+
### LRU异步维表定义
80+
81+
```sql
6882
create table sideTable(
6983
channel varchar,
7084
xccount int,
@@ -87,11 +101,77 @@ create table sideTable(
87101

88102
```
89103

90-
## 6.分区样例
104+
### MySQL异步维表关联
91105

92-
注:分区字段放在最后面,如下,name是分区字段,放在channel,xccount字段的后面
106+
```sql
107+
CREATE TABLE MyTable(
108+
id int,
109+
name varchar
110+
)WITH(
111+
type ='kafka11',
112+
bootstrapServers ='172.16.8.107:9092',
113+
zookeeperQuorum ='172.16.8.107:2181/kafka',
114+
offsetReset ='latest',
115+
topic ='cannan_yctest01',
116+
timezone='Asia/Shanghai',
117+
enableKeyPartitions ='false',
118+
topicIsPattern ='false',
119+
parallelism ='1'
120+
);
121+
122+
CREATE TABLE MyResult(
123+
id INT,
124+
name VARCHAR
125+
)WITH(
126+
type='impala',
127+
url='jdbc:impala://localhost:21050/mytest',
128+
userName='dtstack',
129+
password='abc123',
130+
tableName ='test_impala_zf',
131+
updateMode ='append',
132+
parallelism ='1',
133+
batchSize ='100',
134+
batchWaitInterval ='1000'
135+
);
136+
137+
CREATE TABLE sideTable(
138+
id INT,
139+
name VARCHAR,
140+
PRIMARY KEY(id) ,
141+
PERIOD FOR SYSTEM_TIME
142+
)WITH(
143+
type='impala',
144+
url='jdbc:impala://localhost:21050/mytest',
145+
userName='dtstack',
146+
password='abc123',
147+
tableName ='test_impala_10',
148+
partitionedJoin ='false',
149+
cache ='LRU',
150+
cacheSize ='10000',
151+
cacheTTLMs ='60000',
152+
asyncPoolSize ='3',
153+
parallelism ='1'
154+
);
155+
156+
insert
157+
into
158+
MyResult
159+
select
160+
m.id,
161+
s.name
162+
from
163+
MyTable m
164+
join
165+
sideTable s
166+
on m.id=s.id;
93167

94168
```
169+
170+
### 分区样例
171+
172+
注:分区字段放在最后面,如下,name是分区字段,放在channel,xccount字段的后面
173+
174+
```sql
95175
create table sideTable(
96176
channel varchar,
97177
xccount int,
@@ -118,4 +198,3 @@ create table sideTable(
118198

119199
```
120200

121-

0 commit comments

Comments
 (0)