hudi和flink17编译

手动安装 Kafka 依赖 下载 jar 包

通过网址下载:http://packages.confluent.io/archive/5.3/confluent-5.3.4-2.12.zip 解压后找到以下 jar 包,上传服务器 hadoop102

common-config-5.3.4.jar common-utils-5.3.4.jar kafka-avro-serializer-5.3.4.jar kafka-schema-registry-client-5.3.4.jar install 到 maven 本地仓库

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
mvn install:install-file -DgroupId=io.confluent -DartifactId=common-config -Dversion=5.3.4 -Dpackaging=jar -Dfile=./common-config-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=common-utils -Dversion=5.3.4 -Dpackaging=jar -Dfile=./common-utils-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=kafka-avro-serializer -Dversion=5.3.4 -Dpackaging=jar -Dfile=./kafka-avro-serializer-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=kafka-schema-registry-client -Dversion=5.3.4 -Dpackaging=jar -Dfile=./kafka-schema-registry-client-5.3.4.jar

mvn install:install-file -DgroupId=org.apache.hive -DartifactId=hive-metastore -Dversion=3.1.3 -Dpackaging=jar -Dfile=./hive-metastore-3.1.3.jar
mvn install:install-file -DgroupId=org.apache.flink -DartifactId=flink-table-api-java-bridge -Dversion=1.17.1 -Dpackaging=jar -Dfile=./flink-table-api-java-bridge-1.17.1.jar

mvn install:install-file -DgroupId=com.nimbusds -DartifactId=lang-tag -Dversion=1.7  -Dpackaging=jar -Dfile=./lang-tag-1.7.jar
mvn install:install-file -DgroupId=com.nimbusds -DartifactId=nimbus-jose-jwt -Dversion=9.31  -Dpackaging=jar -Dfile=./nimbus-jose-jwt-9.31.jar

安装 juicefs

1
2
$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v')
$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
1
2
3
4
5
 [Mon May 29 2023  1:11PM (CST+0800)] [3 days,  4:12]  :~/workspace/bigdata/hudi  🍣 master 📦 📝 ×3🛤️  ×2 via 🌟 v1.8.0  took 11s
❯ ls |grep juicefs
       juicefs-1.0.4-linux-amd64.tar.gz
 [Mon May 29 2023  1:12PM (CST+0800)] [3 days,  4:12]  :~/workspace/bigdata/hudi  🍣 master 📦 📝 ×3🛤️  ×2 via 🌟 v1.8.0
1
2
mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice
sudo install juice/juicefs /usr/local/bin

安装 minio

1
2

docker pull minio/minio

启动命令

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
docker run  -p 9000:9000 --name minio \
 -d --restart=always \
 -e MINIO_ACCESS_KEY=minio \
 -e MINIO_SECRET_KEY=minio@123 \
 -v /usr/local/minio/data:/data \
 -v /usr/local/minio/config:/root/.minio \
  minio/minio server /data  --console-address ":9000" --address ":9090"


docker pull bitnami/minio:2022.8.11
docker pull bitnami/minio-client:2022.8.11

docker run --name minio-server -d \
--publish 9000:9000 \
--publish 9001:9001 \
--env MINIO_ROOT_USER="minio-root-user" \
--env MINIO_ROOT_PASSWORD="minio-root-password" \
bitnami/minio:2022.8.11


docker run -it --link minio-server:minio-server --rm --name minio-client \
--env MINIO_SERVER_HOST="minio-server" \
--env MINIO_SERVER_ACCESS_KEY="minio-root-user" \
--env MINIO_SERVER_SECRET_KEY="minio-root-password" \
bitnami/minio-client:2022.8.11 \
mb minio/my-bucket

启动一个元数据存储 redis

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
docker run --name redis -d -p 6379:6379 redis


docker pull bitnami/redis:7.0

# 运行redis server
docker run -d -p 6379:6379 --name redis-server \
-d -e ALLOW_EMPTY_PASSWORD=yes \
-v /home/xfhuang/workspace/bigdata/flink/flink-dist/target/flink-1.17-SNAPSHOT-bin/flink-1.17-SNAPSHOT/redis:/bitnami/redis/data \
bitnami/redis:7.0

# redis client连接
docker run -it --link redis-server:redis-server --rm bitnami/redis:7.0 redis-cli -h redis-server

启动 juicefs

启动 flink

1
2
3
cp ~/workspace/bigdata/hudi/packaging/hudi-flink-bundle/target/hudi-flink1.17-bundle-0.14.0-SNAPSHOT.jar lib
export HADOOP_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath`
bin/start-cluster.sh
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
juicefs format \
--storage minio \
--bucket http://localhost:9000/hudi \
--access-key tZcPkJgG1Bt98X6i61O9 \
--secret-key Pr1UTa6QTeIBqzlZSEgKZAteKnViinLoWGdmffrO \
"redis://localhost:6379/1" \
hudi
# mysql
juicefs format --storage=minio --bucket=http://10.7.3.100:31889/hudi --access-key=minioadmin --secret-key=minioadmin "mysql://root:root@(10.7.3.100:30006)/hudi" juicefsminio


 juicefs format \
    --storage minio \
    --bucket http://127.0.0.1:9000/hudi \
    --access-key minio-root-user \
    --secret-key minio-root-password \
    redis://192.168.120.181:6379/1 \
    hudi

启动 flink sql

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
 ./bin/sql-client.sh embedded

set sql-client.execution.result-mode=tableau;

CREATE TABLE t1(
  uuid VARCHAR(20) PRIMARY KEY NOT ENFORCED,
  name VARCHAR(10),
  age INT,
  ts TIMESTAMP(3),
  `partition` VARCHAR(20)
)
PARTITIONED BY (`partition`)
WITH (
  'connector' = 'hudi',
  'path' = 'jfs://hudi/warehouse',
  'table.type' = 'MERGE_ON_READ' ,
  'hoodie.fs.atomic_creation.support'='jfs'

);
 # 'hoodie.fs.atomic_creation.support'='jfs' 解决报错 Caused by: org.apache.hudi.exception.HoodieLockException: Unsupported scheme :jfs, since this fs can not support atomic creation

导入数据

1
2
3
4
5
6
7
8
9
INSERT INTO t1 VALUES
('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),
('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),
('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),
('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),
('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),
('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),
('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),
('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4');

image-20230529185533648

image-20230529185616835

注意,如果使用的是 jdk11 编译,就用 jdk11 运行,否则 flink 会报错

java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer 解决办法

原因

编译使用的 jdk 版本(12)高于运行环境的 jre 版本(1.8)(1)

解决办法

指定 maven 编译版本,用 release 代替 source/target

Zing 的 jdk 不可以支持 flink。

Licensed under CC BY-NC-SA 4.0
最后更新于 Jan 06, 2025 05:52 UTC
comments powered by Disqus
Built with Hugo
主题 StackJimmy 设计
Caret Up