1、设置环境变量
export JAVA_HOME=/cluster/jdk
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jarp
#export HIVE_HOME=/cluster/hive
export MYSQL_HOME=/cluster/mysql
export HADOOP_HOME=/cluster/hadoop3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export FLINK_HOME=/cluster/flink
export SPARK_HOME=/cluster/spark
export ZK_HOME=/cluster/zookeeper
export NACOS_HOME=/cluster/nacos
export KAFKA_HOME=/cluster/kafka
export DATART_HOME=/cluster/datart
export HBASE_HOME=/cluster/hbase
export SEATUNNEL_HOME=/cluster/seatunnel
export STREAMPARK_HOME=/cluster/streampark
export KYUUBI_HOME=/cluster/kyuubi
export DINKY_HOME=/cluster/dinky
export INLONG_HOME=/cluster/inlong
export DORIS_HOME=/cluster/doris
export BE_HOME=$DORIS_HOME/be
export FE_HOME=$DORIS_HOME/fe
export M2_HOME=/cluster/maven
export PATH=$PATH:$M2_HOME/bin:$BE_HOME/bin:$FE_HOME/bin:$DINKY_HOME/bin:$INLONG_HOME/bin:$DATART_HOME/bin:$KYUUBI_HOME/bin:$HBASE_HOME/bin:$SEATUNNEL_HOME/bin:$STREAMPARK_HOME/bin:$FLINK_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/SPARK_HOME:$KAFKA_HOME:$MYSQL_HOME/bin:$HIVE_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NACOS_HOME/bin:$ZK_HOME/bin
2、 flink的配置文件config.yaml
env:java:opts:all: --add-exports=java.base/sun.net.util=ALL-UNNAMED --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
#==============================================================================
# Common
#==============================================================================
# Common
#==============================================================================
jobmanager:bind-host: 0.0.0.0rpc:address: 0.0.0.0port: 6123memory:process:size: 1600mexecution:failover-strategy: regionarchive:fs:dir: hdfs://10.10.10.99:9000/flink/completed-jobs/
taskmanager:bind-host: 0.0.0.0host: 0.0.0.0numberOfTaskSlots: 100memory:process:size: 1728mnetwork:fraction: 0.1min: 64mbmax: 1gb
parallelism:default: 1
fs:default-scheme: hdfs://10.10.10.99:9000
#==============================================================================
# High Availability zookeeper没有开启认证,应该尝试下怎么开启zookeeper的认证方式
#==============================================================================
high-availability:# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.type: zookeeper# The path where metadata for master recovery is persisted. While ZooKeeper stores# the small ground truth for checkpoint and leader election, this location stores# the larger objects, like persisted dataflow graphs.## Must be a durable file system that is accessible from all nodes# (like HDFS, S3, Ceph, nfs, ...)storageDir: hdfs:///flink/ha/zookeeper:# The list of ZooKeeper quorum peers that coordinate the high-availability# setup. This must be a list of the form:# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)quorum: localhost:2181client:# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)# The default value is "open" and it can be changed to "creator" if ZK security is enabledacl: open
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
# # Execution checkpointing related parameters. Please refer to CheckpointConfig and CheckpointingOptions for more details.
execution:checkpointing:interval: 3minexternalized-checkpoint-retention: DELETE_ON_CANCELLATIONmax-concurrent-checkpoints: 1min-pause: 0smode: EXACTLY_ONCEtimeout: 10mintolerable-failed-checkpoints: 0unaligned: false
state:backend:type: hashmapincremental: falsecheckpoints:dir: hdfs://10.10.10.99:9000/flink/flink-checkpointssavepoints:dir: hdfs://10.10.10.99:9000/flink/flink-savepoints
#==============================================================================
# Rest & web frontend
#==============================================================================
rest:address: 0.0.0.0bind-address: 0.0.0.0web:submit:enable: truecancel:enable: true
#==============================================================================
# Advanced
#==============================================================================
io:tmp:dirs: /tmp
classloader:resolve:order: child-first
#==============================================================================
# Flink Cluster Security Configuration
#==============================================================================
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
# may be enabled in four steps:
# 1. configure the local krb5.conf file
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
# 3. make the credentials available to various JAAS login contexts
# 4. configure the connector to use JAAS/SASL
# # The below configure how Kerberos credentials are provided. A keytab will be used instead of
# # a ticket cache if the keytab path and principal are set.
# security:
# kerberos:
# login:
# use-ticket-cache: true
# keytab: /path/to/kerberos/keytab
# principal: flink-user
# # The configuration below defines which JAAS login contexts
# contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration
#==============================================================================
# zookeeper:
# sasl:
# # Below configurations are applicable if ZK ensemble is configured for security
# #
# # Override below configuration to provide custom ZK service name if configured
# # zookeeper.sasl.service-name: zookeeper
# #
# # The configuration below must match one of the values set in "security.kerberos.login.contexts"
# login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
historyserver:web:address: 0.0.0.0port: 8082archive:fs:dir: hdfs://10.10.10.99:9000/flink/historyserver/completed-jobs/fs.refresh-interval: 10000
3、提交运行
一、Flink 作业提交模式及对应命令
(一)Per - Job 模式
/cluster/flink/bin/flink run \
-t yarn-per-job \
-d \
-ynm YarnPerJobTopSpeedWindowing \
-Dyarn.application.name=YarnPerJobTopSpeedWindowing \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar 3000
====================================================================================================================
(二)Session 模式
该模式需要先启动 Yarn 会话,获取会话 ID 后再提交作业。
1. 启动 Yarn 会话、启动会话后会打印输出会话 ID,示例:application_1740741184000_0002
/cluster/flink/bin/yarn-session.sh \
-jm 2048 \
-tm 2048 \
-s 1 \
-nm yarn-session-app \
-d
停止 $ echo "stop" | ./bin/yarn-session.sh -id application_1740741184000_0002
2. 在 Yarn 会话中提交作业
/cluster/flink/bin/flink run \
-Dyarn.application.name=YarnSessionAppTopSpeedWindowing \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
-yid application_1740741184000_0002 \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar
这个会唤起一个新的flink进程,页面的端口不一定是8081
====================================================================================================================
三)Application 模式
/cluster/flink/bin/flink run-application \
-t yarn-application \
-Dparallelism.default=1 \
-Djobmanager.memory.process.size=2048m \
-Dtaskmanager.memory.process.size=2048m \
-Dyarn.application.name=RunApplicationTopSpeedWindowing \
-Dtaskmanager.numberOfTaskSlots=1 \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar 3000
问题1:Caused by: org.apache.flink.configuration.IllegalConfigurationException:
The number of requested virtual cores for application master 1 exceeds
the maximum number of virtual cores 0 available in the Yarn Cluster
yarn启动失败,spark-3.5.4-yarn-shuffle.jar文件,重启hadoop集群。
页面访问http://ip:18088/cluster