配置集群免密连接 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 vim /etc/hosts ------------------------------- 101.37.71.111 zyh 118.31.67.112 why 101.37.161.4 zzy 101.37.77.203 zjy ------------------------------ 172.27.47.164 zyh ------------------------------ ssh-keygen -t rsa ssh-copy-id root@zyh ssh-copy-id root@why ssh-copy-id root@zzy ssh-copy-id root@zjy 101.37.71.111 Zyh123456789 118.31.67.112 Why12345 101.37.161.4 Zzy147258369 101.37.77.203 Zhou123456mkdir -p /opt/download /opt/software scp xxxx root@zyh:/opt/download scp /etc/profile.d/myenv.sh root@zyh:/etc/profile.d/source /etc/profile
zookeeper 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 mkdir data vim data/myid ---------- 1 ----------mv conf/zoo_sample.cfg conf/zoo.cfg vim conf/zoo.cfg ---------------------------------- dataDir=/opt/software/zookeeper-3.6.3/data quorumListenOnAllIPs=true clientPort=2181 server.1=zzy:2888:3888 server.2=zyh:2888:3888 server.3=why:2888:3888 ----------------------------------echo 'quorumListenOnAllIPs=true' >> /opt/software/zookeeper-3.6.3/conf/zoo.cfg
hadoop 高可用(HA 多个namenode) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 yum -y install ntpdate.x86_64 ntpdate -u cn.pool.ntp.orgexport HADOOP_HOME=/opt/software/hadoop-3.1.3export PATH=$PATH :$HADOOP_HOME /bin:$HADOOP_HOME /sbin:$HADOOP_HOME /libexport HDFS_NAMENODE_USER=rootexport HDFS_DATANODE_USER=rootexport HDFS_SECONDARYNAMENODE_USER=rootexport HDFS_JOURNALNODE_USER=rootexport HDFS_ZKFC_USER=rootexport YARN_RESOURCEMANAGER_USER=rootexport YARN_NODEMANAGER_USER=rootexport HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_YARN_HOME=$HADOOP_HOME export HADOOP_INSTALL=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME /lib/nativeexport HADOOP_CONF_DIR=$HADOOP_HOME export HADOOP_LIBEXEC_DIR=$HADOOP_HOME /libexecexport JAVA_LIBRARY_PATH=$HADOOP_HOME /lib/native:$JAVA_LIBRARY_PATH export HADOOP_CONF_DIR=$HADOOP_HOME /etc/hadoopsource /etc/profilecd /opt/software/hadoop-3.1.3mkdir datacd /opt/software/hadoop-3.1.3/etc/hadoop vim hadoop-env.shexport JAVA_HOME=/opt/software/opt/software/jdk1.8.0_171 vim workers zyh why zzy
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 <property > <name > fs.defaultFS</name > <value > hdfs://mycluster</value > </property > <property > <name > hadoop.tmp.dir</name > <value > /opt/software/hadoop-3.1.3/data</value > </property > <property > <name > hadoop.http.staticuser.user</name > <value > root</value > </property > <property > <name > hadoop.proxyuser.root.hosts</name > <value > *</value > </property > <property > <name > hadoop.proxyuser.root.groups</name > <value > *</value > </property > <property > <name > ha.zookeeper.quorum</name > <value > why:2181,zyh:2181,zzy:2181</value > </property > <property > <name > hadoop.zk.address</name > <value > why:2181,zyh:2181,zzy:2181</value > </property >
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 <property > <name > dfs.namenode.name.dir</name > <value > /opt/software/hadoop-3.1.3/data/name</value > </property > <property > <name > dfs.datanode.data.dir</name > <value > /opt/software/hadoop-3.1.3/data/data</value > </property > <property > <name > dfs.journalnode.edits.dir</name > <value > /opt/software/hadoop-3.1.3/data/jn</value > </property > <property > <name > dfs.nameservices</name > <value > mycluster</value > </property > <property > <name > dfs.ha.namenodes.mycluster</name > <value > nn1,nn2,nn3</value > </property > <property > <name > dfs.namenode.rpc-address.mycluster.nn1</name > <value > zyh:8020</value > </property > <property > <name > dfs.namenode.rpc-address.mycluster.nn2</name > <value > why:8020</value > </property > <property > <name > dfs.namenode.rpc-address.mycluster.nn3</name > <value > zzy:8020</value > </property > <property > <name > dfs.namenode.http-address.mycluster.nn1</name > <value > zyh:9870</value > </property > <property > <name > dfs.namenode.http-address.mycluster.nn2</name > <value > why:9870</value > </property > <property > <name > dfs.namenode.http-address.mycluster.nn3</name > <value > zzy:9870</value > </property > <property > <name > dfs.namenode.shared.edits.dir</name > <value > qjournal://zyh:8485;why:8485;zzy:8485/mycluster</value > </property > <property > <name > dfs.client.failover.proxy.provider.mycluster</name > <value > org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value > </property > <property > <name > dfs.ha.fencing.methods</name > <value > sshfence</value > </property > <property > <name > dfs.ha.fencing.ssh.private-key-files</name > <value > /root/.ssh/id_rsa</value > </property > <property > <name > dfs.ha.automatic-failover.enabled</name > <value > true</value > </property > <property > <name > dfs.permissions.enabled</name > <value > false</value > </property >
1 2 3 4 5 6 7 8 9 10 <property > <name > mapreduce.jobhistory.address</name > <value > zyh:10020</value > </property > <property > <name > mapreduce.jobhistory.webapp.address</name > <value > zyh:19888</value > </property >
yarn HA
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 <property > <name > yarn.nodemanager.aux-services</name > <value > mapreduce_shuffle</value > </property > <property > <name > yarn.resourcemanager.ha.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.cluster-id</name > <value > mycluster</value > </property > <property > <name > yarn.resourcemanager.ha.rm-ids</name > <value > rm1,rm2,rm3</value > </property > <property > <name > yarn.resourcemanager.hostname.rm1</name > <value > zyh</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm1</name > <value > zyh:8088</value > </property > <property > <name > yarn.resourcemanager.address.rm1</name > <value > zyh:8032</value > </property > <property > <name > yarn.resourcemanager.scheduler.address.rm1</name > <value > zyh:8030</value > </property > <property > <name > yarn.resourcemanager.resource-tracker.address.rm1</name > <value > zyh:8031</value > </property > <property > <name > yarn.resourcemanager.hostname.rm2</name > <value > why</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm2</name > <value > why:8088</value > </property > <property > <name > yarn.resourcemanager.address.rm2</name > <value > why:8032</value > </property > <property > <name > yarn.resourcemanager.scheduler.address.rm2</name > <value > why:8030</value > </property > <property > <name > yarn.resourcemanager.resource-tracker.address.rm2</name > <value > why:8031</value > </property > <property > <name > yarn.resourcemanager.hostname.rm3</name > <value > zzy</value > </property > <property > <name > yarn.resourcemanager.webapp.address.rm3</name > <value > zzy:8088</value > </property > <property > <name > yarn.resourcemanager.address.rm3</name > <value > zzy:8032</value > </property > <property > <name > yarn.resourcemanager.scheduler.address.rm3</name > <value > zzy:8030</value > </property > <property > <name > yarn.resourcemanager.resource-tracker.address.rm3</name > <value > zzy:8031</value > </property > <property > <name > yarn.resourcemanager.zk-address</name > <value > why:2181,zyh:2181,zzy:2181</value > </property > <property > <name > yarn.resourcemanager.recovery.enabled</name > <value > true</value > </property > <property > <name > yarn.resourcemanager.store.class</name > <value > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value > </property > <property > <name > yarn.nodemanager.env-whitelist</name > <value > JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value > </property >
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 zkServer.sh start zkServer.sh status hdfs --daemon start journalnode hdfs zkfc -formatZK hdfs namenode -format start-all.sh hdfs namenode -bootstrapStandby hdfs --daemon start namenode mapred --deamon start historyserver mapred --deamon stop historyserver hdfs haadmin -getServiceState nn1 hdfs haadmin -getServiceState nn2 hdfs haadmin -transitionToActive --forcemanual nn1 hdfs --daemon stop namenode hdfs --daemon start namenode
Kafka 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 config/server.properties broker.id=0 delete.topic.enable=true num.partitions=3 log.retention.hours=168 log.segment.bytes=1073741824 log.retention.check.interval.ms=300000 zookeeper.connect=why:2181,zzy:2181,zyh:2181
Flink 高可用 workers 和 masters省略
flink-conf.yaml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 jobmanager.rpc.address: zyh jobmanager.rpc.port: 6123 jobmanager.memory.process.size: 2048m taskmanager.host: zyh taskmanager.memory.process.size: 4096m taskmanager.numberOfTaskSlots: 3 parallelism.default: 3 high-availability.type: zookeeper high-availability.storageDir: hdfs://mycluster/flink/ha/ high-availability.zookeeper.path.root: /flink high-availability.cluster-id: /cluster_one high-availability.zookeeper.quorum: zyh:2181,zzy:2181,why:2181 execution.checkpointing.interval: 30000 execution.checkpointing.externalized-checkpoint-retention: RETAIN_ON_CANCELLATION execution.checkpointing.max-concurrent-checkpoints: 2 execution.checkpointing.min-pause: 500 execution.checkpointing.mode: EXACTLY_ONCE execution.checkpointing.timeout: 600000 execution.checkpointing.tolerable-failed-checkpoints: 3 restart-strategy.type: fixed-delay restart-strategy.fixed-delay.attempts: 3 restart-strategy.fixed-delay.delay: 10000 state.backend: filesystem state.checkpoints.dir: hdfs://mycluster/flink-checkpoints state.savepoints.dir: hdfs://mycluster/flink-savepoints jobmanager.execution.failover-strategy: region rest.port: 8081 rest.address: zyh jobmanager.archive.fs.dir: hdfs://mycluster/logs/flink-job historyserver.web.address: zyh historyserver.web.port: 8082 historyserver.archive.fs.dir: hdfs://mycluster/logs/flink-job historyserver.archive.fs.refresh-interval: 5000
1 2 3 cp $ZOOKEEPER_HOME /conf/zoo.cfg $FLINK_HOME /conf/cp $HADOOP_HOME /etc/hadoop/hdfs-site.xml /opt/software/flink-1.17.0/conf/cp $HADOOP_HOME /etc/hadoop/core-site.xml /opt/software/flink-1.17.0/conf/
Mysql 8 安装 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 sudo yum update -y sudo yum install wget -y wget https://dev.mysql.com/get/mysql80-community-release-el7-7.noarch.rpm sudo yum localinstall mysql80-community-release-el7-7.noarch.rpm sudo yum install mysql-community-server --nogpgcheck sudo systemctl start mysqld sudo systemctl enable mysqld sudo grep 'temporary password' /var/log/mysqld.log mysql -u root -p ALTER USER 'root' @'localhost' IDENTIFIED BY '1e;YtoJP2kOd' ; SET GLOBAL validate_password.policy = LOW; SET GLOBAL validate_password.length = 6; ALTER USER 'root' @'localhost' IDENTIFIED BY '123456' ; create user root@'%' identified by 'password' password expire never; ALTER USER 'root' @'%' IDENTIFIED BY 'Ljj315..' ; FLUSH PRIVILEGES; create user root@'%' identified by '123456' password expire never; grant all on *.* to root@'%' ; FLUSH PRIVILEGES;
Hive 一台即可
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 linux hive 安装压缩包 apache-hive-3.1.2-bin.tar.gz 上传至 /opt/download/second下 mysql8 链接驱动包 mysql-connector-j-8.0.33.jar 上传至 /opt/download/second下cd /opt/download tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /opt/software/mv /opt/software/apache-hive-3.1.2-bin/ /opt/software/hive-3.1.2cd /opt/software/hive-3.1.2 vim /etc/profile.d/my.shexport HIVE_HOME=/opt/software/hive312export PATH=$PATH :$HIVE_HOME /binsource /etc/profilemv conf/hive-default.xml.template conf/hive-default.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 <?xml version="1.0" encoding="UTF-8" standalone="no" ?> <?xml-stylesheet type="text/xsl" href="configuration.xsl" ?> <configuration > <property > <name > hive.metastore.sasl.enabled</name > <value > false</value > </property > <property > <name > hive.server2.enable.doAs</name > <value > false</value > </property > <property > <name > hive.server2.authentication</name > <value > NONE</value > </property > <property > <name > hive.metastore.warehouse.dir</name > <value > /hive312/warehouse</value > </property > <property > <name > hive.metastore.db.type</name > <value > mysql</value > </property > <property > <name > javax.jdo.option.ConnectionURL</name > <value > jdbc:mysql://zyh:3306/hive312?createDatabaseIfNotExist=true</value > </property > <property > <name > javax.jdo.option.ConnectionDriverName</name > <value > com.mysql.cj.jdbc.Driver</value > </property > <property > <name > javax.jdo.option.ConnectionUserName</name > <value > root</value > </property > <property > <name > javax.jdo.option.ConnectionPassword</name > <value > 123456</value > </property > <property > <name > hive.metastore.schema.verification</name > <value > false</value > </property > <property > <name > hive.cli.print.current.db</name > <value > true</value > </property > <property > <name > hive.cli.print.header</name > <value > true</value > </property > </configuration >
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 cp /opt/download/mysql-connector-j-8.0.33.jar lib/ls lib/|grep guavarm -f lib/guava-19.0.jar find /opt/software/hadoop-3.1.3/ -name guava*cp /opt/software/hadoop-3.1.3/share/hadoop/hdfs/lib/guava-27.0-jre.jar lib/ schematool -dbType mysql -initSchemanohup hive --service metastore 1>/dev/null 2>&1 & hivenohup hive --service hiveserver2 1>/dev/null 2>&1 & beeline -u jdbc:hive2://localhost:10000 <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>${hive.version} </version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive.version} </version> </dependency>
Spark 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 vim spark-env.sh ----------------------------------------------------------------export JAVA_HOME=/opt/software/jdk1.8.0_171export HADOOP_CONF_DIR=/opt/software/hadoop-3.1.3/etc/hadoop SPARK_MASTER_WEBUI_PORT=9090 SPARK_HISTORY_OPTS=" -Dspark.history.ui.port=9091 -Dspark.history.fs.logDirectory=hdfs://zyh:8020/spark_event_log_dir -Dspark.history.retainedApplications=30" SPARK_DAEMON_JAVA_OPTS=" -Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=why,zyh,zzy -Dspark.deploy.zookeeper.dir=/spark" ---------------------------------------------------------------------- workers ----------- zzy zyh -----------
why/2/4
zyh/2/4
zzy/2/4
zjy/2/2
QuorumPeerMain
QuorumPeerMain
QuorumPeerMain
DataNode
DataNode
DataNode
DataNode
NodeManager
NodeManager
NodeManager
NodeManager
ResourceManager
ResourceManager(x)
ResourceManager
NameNode
NameNode(x)
NameNode
DFSZKFailoverController
DFSZKFailoverController(x)
DFSZKFailoverController
JournalNode
JournalNode(x)
JournalNode
Master Worker
Worker
Kafka
Kafka
StandaloneSession ClusterEntrypoint(x)
StandaloneSession ClusterEntrypoint(x)
TaskManagerRunner(x)
TaskManagerRunner(x)
TaskManagerRunner(x)
TaskManagerRunner(x)
RunJar RunJar(hive)
mysql 8
1 2 3 4 5 6 http://zzy:9870/ hadoop http://zyh:9090/ spark Master http://zyh:8081/ Worker zyh mysql 3306 123456 hive
Hadoop集群配置 资源不足不用高可用,一台全部配置,其他三台只要datanode和nodemanager
免密环境变量同上
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 <property > <name > fs.defaultFS</name > <value > hdfs://zzy:8020</value > </property > <property > <name > hadoop.tmp.dir</name > <value > /opt/software/hadoop-3.1.3/data</value > </property > <property > <name > hadoop.http.staticuser.user</name > <value > root</value > </property > <property > <name > hadoop.proxyuser.root.hosts</name > <value > *</value > </property > <property > <name > hadoop.proxyuser.root.groups</name > <value > *</value > </property >
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 <property > <name > dfs.namenode.http-address</name > <value > zzy:9870</value > </property > <property > <name > dfs.namenode.secondary.http-address</name > <value > zzy:9868</value > </property > <property > <name > dfs.replication</name > <value > 3</value > </property > <property > <name > dfs.webhdfs.enabled</name > <value > true</value > </property >
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 <property > <name > yarn.nodemanager.aux-services</name > <value > mapreduce_shuffle</value > </property > <property > <name > yarn.resourcemanager.hostname</name > <value > zzy</value > </property > <property > <name > yarn.nodemanager.env-whitelist</name > <value > JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value > </property > <property > <name > yarn.log-aggregation-enable</name > <value > true</value > </property > <property > <name > yarn.log.server.url</name > <value > http://zzy:19888/jobhistory/logs</value > </property > <property > <name > yarn.log-aggregation.retain-seconds</name > <value > 604800</value > </property >
1 2 3 4 5 6 7 8 9 10 11 <property > <name > mapreduce.framework.name</name > <value > yarn</value > </property > <property > <name > mapreduce.jobhistory.webapp.address</name > <value > zzy:19888</value > </property >
workers
1 2 3 4 5 6 hadoop namenode -format start-all.sh mapred --daemon start historyserver
1 2 3 4 5 6 7 8 9 10 11 101.37.71.111 zyh 118.31.67.112 why 101.37.161.4 zzy 101.37.77.203 zjy http://101.37.161.4:9870/ hadoop http://zyh:9090/ spark Master http://zyh:8081/ Worker zyh mysql 3306 123456 hive
服务器
why/2/4(118.31.67.112)
zyh/2/4(101.37.71.111)
zzy/2/4(101.37.161.4)
zjy/2/2(101.37.77.203)
zookeeper
QuorumPeerMain
QuorumPeerMain
QuorumPeerMain
hadoop
DataNode
DataNode
DataNode
DataNode
NodeManager
NodeManager
NodeManager
NodeManager
ResourceManager8088
NameNode 9870
JobHistoryServer 19888
spark
Master 9090
Worker8081
kafka
Kafka
Kafka
hive
RunJar RunJar
mysql
mysql 8(密码123456)
flink
1 2 3 4 5 6 7 8 sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \--username root \--password Ljj315 .. \--connect jdbc:mysql: //leafdomain .cn /order_sys --table region_info \--fields-terminated-by ',' \--hive-import \--hive-database order_sys \--hive-table region_info