Skip to content

Commit be58566

Browse files
committed
Merge branch 'v1.8.0_dev_feature_impala' into 'v1.8.0_dev'
添加impala结果表和维表 添加impala结果表和维表 See merge request !146
2 parents 212a891 + 09f9537 commit be58566

File tree

23 files changed

+1925
-8
lines changed

23 files changed

+1925
-8
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727

2828
# 已支持
2929
* 源表:kafka 0.9、0.10、0.11、1.x版本
30-
* 维表:mysql, SQlServer,oracle, hbase, mongo, redis, cassandra, serversocket, kudu, postgresql, clickhouse
31-
* 结果表:mysql, SQlServer, oracle, hbase, elasticsearch5.x, mongo, redis, cassandra, console, kudu, postgresql, clickhouse
30+
* 维表:mysql, SQlServer,oracle, hbase, mongo, redis, cassandra, serversocket, kudu, postgresql, clickhouse, impala
31+
* 结果表:mysql, SQlServer, oracle, hbase, elasticsearch5.x, mongo, redis, cassandra, console, kudu, postgresql, clickhouse, impala
3232

3333
# 后续开发计划
3434
* 维表快照
@@ -186,6 +186,7 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack
186186
* [kudu 结果表插件](docs/kuduSink.md)
187187
* [postgresql 结果表插件](docs/postgresqlSink.md)
188188
* [clickhouse 结果表插件](docs/clickhouseSink.md)
189+
* [impalahouse 结果表插件](docs/impalaSink.md)
189190

190191
### 2.3 维表插件
191192
* [hbase 维表插件](docs/hbaseSide.md)
@@ -197,6 +198,7 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack
197198
* [kudu 维表插件](docs/kuduSide.md)
198199
* [postgresql 维表插件](docs/postgresqlSide.md)
199200
* [clickhouse 维表插件](docs/clickhouseSide.md)
201+
* [impalahouse 维表插件](docs/impalaSide.md)
200202

201203
## 3 性能指标(新增)
202204

docs/impalaSide.md

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
2+
## 1.格式:
3+
```
4+
CREATE TABLE tableName(
5+
colName cloType,
6+
...
7+
PRIMARY KEY(keyInfo),
8+
PERIOD FOR SYSTEM_TIME
9+
)WITH(
10+
type='impala',
11+
url='jdbcUrl',
12+
userName='dbUserName',
13+
password='dbPwd',
14+
tableName='tableName',
15+
cache ='LRU',
16+
cacheSize ='10000',
17+
cacheTTLMs ='60000',
18+
parallelism ='1',
19+
partitionedJoin='false'
20+
);
21+
```
22+
23+
# 2.支持版本
24+
2.10.0-cdh5.13.0
25+
26+
## 3.表结构定义
27+
28+
|参数名称|含义|
29+
|----|---|
30+
| tableName | 注册到flink的表名称|
31+
| colName | 列名称|
32+
| colType | 列类型 [colType支持的类型](colType.md)|
33+
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
34+
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
35+
36+
## 4.参数
37+
38+
|参数名称|含义|是否必填|默认值|
39+
|----|---|---|----|
40+
| type | 表明维表的类型[impala] |||
41+
| url | 连接postgresql数据库 jdbcUrl |||
42+
| userName | postgresql连接用户名 |||
43+
| password | postgresql连接密码|||
44+
| tableName | postgresql表名称|||
45+
| authMech | 身份验证机制 (0, 1, 2, 3), 暂不支持kerberos ||0|
46+
| principal | kerberos用于登录的principal(authMech=1时独有) |authMech=1为必填|
47+
| keyTabFilePath | keytab文件的路径(authMech=1时独有) |authMech=1为必填 ||
48+
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) |authMech=1为必填||
49+
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) |authMech=1为必填||
50+
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
51+
| enablePartition | 是否支持分区||false|
52+
| partitionfields | 分区字段名|否,enablePartition='true'时为必填||
53+
| partitionFieldTypes | 分区字段类型 |否,enablePartition='true'时为必填||
54+
| partitionValues | 分区值|否,enablePartition='true'时为必填||
55+
| cache | 维表缓存策略(NONE/LRU/ALL)||NONE|
56+
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
57+
58+
----------
59+
> 缓存策略
60+
* NONE: 不做内存缓存
61+
* LRU:
62+
* cacheSize: 缓存的条目数量
63+
* cacheTTLMs:缓存的过期时间(ms)
64+
65+
66+
## 5.样例
67+
```
68+
create table sideTable(
69+
channel varchar,
70+
xccount int,
71+
PRIMARY KEY(channel),
72+
PERIOD FOR SYSTEM_TIME
73+
)WITH(
74+
type='impala',
75+
url='jdbc:impala://localhost:21050/mytest',
76+
userName='dtstack',
77+
password='abc123',
78+
tableName='sidetest',
79+
authMech='3',
80+
cache ='LRU',
81+
cacheSize ='10000',
82+
cacheTTLMs ='60000',
83+
parallelism ='1',
84+
partitionedJoin='false'
85+
);
86+
87+
88+
```
89+
90+
## 6.分区样例
91+
92+
```
93+
create table sideTable(
94+
channel varchar,
95+
xccount int,
96+
name varchar,
97+
PRIMARY KEY(channel),
98+
PERIOD FOR SYSTEM_TIME
99+
)WITH(
100+
type='impala',
101+
url='jdbc:impala://localhost:21050/mytest',
102+
userName='dtstack',
103+
password='abc123',
104+
tableName='sidetest',
105+
authMech='3',
106+
cache ='LRU',
107+
cacheSize ='10000',
108+
cacheTTLMs ='60000',
109+
parallelism ='1',
110+
enablePartition='true',
111+
partitionfields='name',
112+
partitionFieldTypes='varchar',
113+
partitionValues='{"name":["tom","jeck"]}',
114+
partitionedJoin='false'
115+
);
116+
117+
```
118+
119+

docs/impalaSink.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
## 1.格式:
2+
```
3+
CREATE TABLE tableName(
4+
colName colType,
5+
...
6+
colNameX colType
7+
)WITH(
8+
type ='impala',
9+
url ='jdbcUrl',
10+
userName ='userName',
11+
password ='pwd',
12+
tableName ='tableName',
13+
parallelism ='parllNum'
14+
);
15+
16+
```
17+
18+
## 2.支持版本
19+
2.10.0-cdh5.13.0
20+
21+
## 3.表结构定义
22+
23+
|参数名称|含义|
24+
|----|---|
25+
| tableName| 在 sql 中使用的名称;即注册到flink-table-env上的名称|
26+
| colName | 列名称|
27+
| colType | 列类型 [colType支持的类型](colType.md)|
28+
29+
## 4.参数:
30+
31+
|参数名称|含义|是否必填|默认值|
32+
|----|----|----|----|
33+
| type |表明 输出表类型[impala]|||
34+
| url | 连接postgresql数据库 jdbcUrl |||
35+
| userName | postgresql连接用户名 |||
36+
| password | postgresql连接密码|||
37+
| tableName | postgresqll表名称|||
38+
| authMech | 身份验证机制 (0, 1, 2, 3),暂不支持kerberos ||0|
39+
| principal | kerberos用于登录的principal(authMech=1时独有) |authMech=1为必填|
40+
| keyTabFilePath | keytab文件的路径(authMech=1时独有) |authMech=1为必填 ||
41+
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) |authMech=1为必填||
42+
| krbHostFQDN | 主机的标准域名(authMech=1时独有) |authMech=1为必填 ||
43+
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) |authMech=1为必填||
44+
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
45+
| enablePartition | 是否支持分区 ||false|
46+
| partitionFields | 分区字段名|否,enablePartition='true'时为必填||
47+
| parallelism | 并行度设置||1|
48+
49+
50+
## 5.样例:
51+
```
52+
CREATE TABLE MyResult(
53+
channel VARCHAR,
54+
pv VARCHAR
55+
)WITH(
56+
type ='impala',
57+
url ='jdbc:impala://localhost:21050/mytest',
58+
userName ='dtstack',
59+
password ='abc123',
60+
authMech = '3',
61+
tableName ='pv2',
62+
parallelism ='1'
63+
)
64+
```

docs/postgresqlSide.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
|参数名称|含义|
2929
|----|---|
30-
| tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)|
30+
| tableName | 注册到flink的表名称|
3131
| colName | 列名称|
3232
| colType | 列类型 [colType支持的类型](colType.md)|
3333
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
@@ -42,7 +42,6 @@
4242
| userName | postgresql连接用户名 |||
4343
| password | postgresql连接密码|||
4444
| tableName | postgresql表名称|||
45-
| tableName | postgresql 的表名称|||
4645
| cache | 维表缓存策略(NONE/LRU/ALL)||NONE|
4746
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
4847

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<parent>
6+
<artifactId>sql.side.impala</artifactId>
7+
<groupId>com.dtstack.flink</groupId>
8+
<version>1.0-SNAPSHOT</version>
9+
<relativePath>../pom.xml</relativePath>
10+
</parent>
11+
<modelVersion>4.0.0</modelVersion>
12+
13+
<artifactId>sql.side.all.impala</artifactId>
14+
<name>impala-all-side</name>
15+
<packaging>jar</packaging>
16+
17+
<properties>
18+
<sql.side.impala.core.version>1.0-SNAPSHOT</sql.side.impala.core.version>
19+
</properties>
20+
21+
<dependencies>
22+
<dependency>
23+
<groupId>com.dtstack.flink</groupId>
24+
<artifactId>sql.side.impala.core</artifactId>
25+
<version>${sql.side.impala.core.version}</version>
26+
</dependency>
27+
</dependencies>
28+
29+
<build>
30+
<plugins>
31+
<plugin>
32+
<groupId>org.apache.maven.plugins</groupId>
33+
<artifactId>maven-shade-plugin</artifactId>
34+
<version>1.4</version>
35+
<executions>
36+
<execution>
37+
<phase>package</phase>
38+
<goals>
39+
<goal>shade</goal>
40+
</goals>
41+
<configuration>
42+
<artifactSet>
43+
<excludes>
44+
<exclude>com.fasterxml.jackson.*</exclude>
45+
<exclude>org.slf4j</exclude>
46+
</excludes>
47+
</artifactSet>
48+
<filters>
49+
<filter>
50+
<artifact>*:*</artifact>
51+
<excludes>
52+
<exclude>META-INF/*.SF</exclude>
53+
<exclude>META-INF/*.DSA</exclude>
54+
<exclude>META-INF/*.RSA</exclude>
55+
</excludes>
56+
</filter>
57+
</filters>
58+
</configuration>
59+
</execution>
60+
</executions>
61+
</plugin>
62+
63+
<plugin>
64+
<artifactId>maven-antrun-plugin</artifactId>
65+
<version>1.2</version>
66+
<executions>
67+
<execution>
68+
<id>copy-resources</id>
69+
<!-- here the phase you need -->
70+
<phase>package</phase>
71+
<goals>
72+
<goal>run</goal>
73+
</goals>
74+
<configuration>
75+
<tasks>
76+
<copy todir="${basedir}/../../../plugins/impalaallside">
77+
<fileset dir="target/">
78+
<include name="${project.artifactId}-${project.version}.jar"/>
79+
</fileset>
80+
</copy>
81+
82+
<move file="${basedir}/../../../plugins/impalaallside/${project.artifactId}-${project.version}.jar"
83+
tofile="${basedir}/../../../plugins/impalaallside/${project.name}-${git.branch}.jar"/>
84+
</tasks>
85+
</configuration>
86+
</execution>
87+
</executions>
88+
</plugin>
89+
</plugins>
90+
</build>
91+
92+
</project>

0 commit comments

Comments
 (0)