本教程由作者xhx 与 Claud 3.7 Sonnet 共同创作完成。
本系统基于CentOS7 2009 版,其他版本系统可能存在各种问题,请不要轻易尝试,以免无谓浪费时间。适用于虚拟机环境或者物理机环境。推荐在VirtualBox 中安装虚拟机来完成。终端工具建议使用MobarXterm,文件传输工具使用WinSCP。
本指南共分为11个部分,主要安装的大数据开发组件包括:
-
MySQL 5.7
-
JDK 1.8
-
Hadoop 3.1.3
-
ZooKeeper 3.5.7
-
Hive 3.1.2
-
Kafka 2.4.1
-
Saprk 3.1.1
-
Flink 1.14.0
-
HBase 2.2.3
-
Flume 1.9.0
本指南所需要的软件下载链接为:https://pan.quark.cn/s/49707c42257e
基本上和大数据职业院校技能大赛江苏省省赛所要求的环境一致。大数据的开发环境对版本的依赖性较高,没有必要追求新版的组件,以避免出现各种莫名其妙的BUG。
0. 准备工作
首先,我们需要设置一些基本环境变量和目录结构。
# 创建应用程序安装目录
mkdir -p /opt/module
mkdir -p /opt/software
# 使用winscp工具将所需软件拷贝到/opt/software目录下,如果是Linux系统请使用scp命令
# 创建数据目录
mkdir -p /data
# 设置JAVA_HOME环境变量
echo 'export JAVA_HOME=/opt/module/jdk1.8.0' >> /etc/profile
echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile
# 设置主机名和hosts
echo "master" > /etc/hostname
#假设本机地址为192.168.55.123
echo "192.168.55.123 master" >> /etc/hosts
# 刷新环境变量
source /etc/profile
1. 安装MySQL5.7
需要的离线安装包如下:
mysql-community-common-5.7.28-1.el7.x86_64.rpm
mysql-community-libs-5.7.28-1.el7.x86_64.rpm
mysql-community-client-5.7.28-1.el7.x86_64.rpm
mysql-community-server-5.7.28-1.el7.x86_64.rpm
将所需要的安装包放在一个文件夹下面,比如 mysqlhome
。为了避免逐一下载依赖,可以使用下面的命令
yum localinstall *.rpm -y
在联网的情况下自动下载相关依赖文件。
安装完成之后启动mysql服务
# 启动 MySQL 服务
systemctl start mysqld
systemctl enable mysqld
下面将root密码修改为“123456”。
- 获取临时密码
# 获取临时 root 密码
sudo grep 'temporary password' /var/log/mysqld.log
# 登录 MySQL
mysql -uroot -p
- 修改密码策略:
# 编辑 MySQL 配置文件
vi /etc/my.cnf
# 添加以下内容到 [mysqld] 部分
[mysqld]
validate_password_policy=LOW
validate_password_length=6
validate_password_special_char_count=0
validate_password_mixed_case_count=0
validate_password_number_count=0
重新启动mysqld服务
systemctl restart mysqld
- 登录后修改 root 密码:(比如修改为123456,不可用于生产环境)
ALTER USER 'root'@'localhost' IDENTIFIED BY '123456';
开启远程访问
- 授予 root 用户从任何主机访问的权限
CREATE USER 'root'@'%' IDENTIFIED BY '123456';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;
FLUSH PRIVILEGES;
- 关闭centos7 防火墙
systemctl stop firewalld
systemctl disable firewalld
- 测试,使用navicat连接测试
2. 安装JDK 1.8
cd /opt/software
# 解压JDK
tar -zxvf jdk-8u*.tar.gz -C /opt/module/
mv /opt/module/jdk1.8.0_* /opt/module/jdk1.8.0
# 验证安装
source /etc/profile
java -version
3. 安装Hadoop 3.1.3和Yarn 3.1.3
# 解压Hadoop
tar -zxvf hadoop-3.1.3.tar.gz -C /opt/module/
# 配置环境变量
echo 'export HADOOP_HOME=/opt/module/hadoop-3.1.3' >> /etc/profile
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> /etc/profile
source /etc/profile
# 配置core-site.xml
cat > /opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/tmp</value>
</property>
</configuration>
EOF
# 配置hdfs-site.xml
mkdir -p /data/hadoop/hdfs/{name,data}
cat > /opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/hadoop/hdfs/data</value>
</property>
</configuration>
EOF
# 配置mapred-site.xml
cat > /opt/module/hadoop-3.1.3/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
EOF
# 配置yarn-site.xml
cat > /opt/module/hadoop-3.1.3/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
EOF
# 格式化HDFS
hdfs namenode -format
# 编辑 start-dfs.sh 和 stop-dfs.sh
vi /opt/module/hadoop-3.1.3/sbin/start-dfs.sh
# 在文件开头添加以下内容(#!/bin/bash 行之后)
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
# 同样,编辑 /opt/module/hadoop-3.1.3/sbin/start-yarn.sh 和 /opt/module/hadoop-3.1.3/sbin/stop-yarn.sh 文件:
# 编辑 start-yarn.sh
vi /opt/module/hadoop-3.1.3/sbin/start-yarn.sh
# 在文件开头添加以下内容(#!/bin/bash 行之后)
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
#编辑stop-yarn.sh文件,同上。
# 生成密钥对,实现ssh免密访问
ssh-keygen
ssh-copy-id master
# 修改hadoop-env.sh
vi /opt/module/hadoop-3.1.3/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0
# 启动Hadoop
/opt/module/hadoop-3.1.3/sbin/start-dfs.sh
/opt/module/hadoop-3.1.3/sbin/start-yarn.sh
#修改hdfs目录权限,让其他用户可远程修改hdfs文件系统。生产环境不推荐
hdfs dfs chmod -R 777 /
4. 安装ZooKeeper 3.5.7
# 解压ZooKeeper
tar -zxvf apache-zookeeper-3.5.7-bin.tar.gz -C /opt/module/
mv /opt/module/apache-zookeeper-3.5.7-bin /opt/module/zookeeper-3.5.7
# 配置环境变量
echo 'export ZOOKEEPER_HOME=/opt/module/zookeeper-3.5.7' >> /etc/profile
echo 'export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile
source /etc/profile
# 创建数据目录
mkdir -p /data/zookeeper
# 配置zoo.cfg
cat > /opt/module/zookeeper-3.5.7/conf/zoo.cfg << EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/zookeeper
clientPort=2181
server.1=master:2888:3888
EOF
# 创建myid文件
echo "1" > /data/zookeeper/myid
# 启动ZooKeeper
/opt/module/zookeeper-3.5.7/bin/zkServer.sh start
5. 安装Hive 3.1.2
# 解压Hive
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C /opt/module/
mv /opt/module/apache-hive-3.1.2-bin /opt/module/hive-3.1.2
# 配置环境变量
echo 'export HIVE_HOME=/opt/module/hive-3.1.2' >> /etc/profile
echo 'export PATH=$PATH:$HIVE_HOME/bin' >> /etc/profile
source /etc/profile
# 创建Hive的HDFS目录
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -mkdir -p /tmp/hive
hadoop fs -chmod g+w /user/hive/warehouse
hadoop fs -chmod g+w /tmp/hive
# 配置hive-site.xml
cat > /opt/module/hive-3.1.2/conf/hive-site.xml << EOF
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/metastore?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>master</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
</configuration>
EOF
# 复制MySQL JDBC驱动到Hive的lib目录
cp mysql-connector-j-8.4.0.jar /opt/module/hive-3.1.2/lib/
# Guava版本冲突确认
# 查看 Hadoop 使用的 Guava 版本
find $HADOOP_HOME -name "guava*.jar"
# 查看 Hive 使用的 Guava 版本
find $HIVE_HOME -name "guava*.jar"
# 删除 Hive 的 Guava JAR
find $HIVE_HOME/lib -name "guava*.jar" -exec rm -f {} \;
# 复制 Hadoop 的 Guava JAR 到 Hive lib 目录
find $HADOOP_HOME -name "guava*.jar" -exec cp {} $HIVE_HOME/lib/ \;
# 初始化Hive元数据库
/opt/module/hive-3.1.2/bin/schematool -dbType mysql -initSchema
# 创建目录
hadoop fs -mkdir -p /tmp/hive
hadoop fs -chmod -R 777 /tmp/hive
# 启动Hive Metastore服务
nohup hive --service metastore &
# 启动HiveServer2
nohup hive --service hiveserver2 &
6. 安装Kafka 2.4.1
# 解压Kafka
tar -zxvf kafka_2.12-2.4.1.tgz -C /opt/module/
mv /opt/module/kafka_2.12-2.4.1 /opt/module/kafka-2.4.1
# 配置环境变量
echo 'export KAFKA_HOME=/opt/module/kafka-2.4.1' >> /etc/profile
echo 'export PATH=$PATH:$KAFKA_HOME/bin' >> /etc/profile
source /etc/profile
# 创建Kafka日志目录
mkdir -p /data/kafka-logs
# 修改Kafka配置
cat > /opt/module/kafka-2.4.1/config/server.properties << EOF
broker.id=0
listeners=PLAINTEXT://master:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/data/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=master:2181
zookeeper.connection.timeout.ms=18000
group.initial.rebalance.delay.ms=0
EOF
# 启动Kafka
nohup /opt/module/kafka-2.4.1/bin/kafka-server-start.sh /opt/module/kafka-2.4.1/config/server.properties &
7. 安装Spark 3.1.1
# 解压Spark
tar -zxvf spark-3.1.1-bin-hadoop3.2.tgz -C /opt/module/
mv /opt/module/spark-3.1.1-bin-hadoop3.2 /opt/module/spark-3.1.1
# 配置环境变量
echo 'export SPARK_HOME=/opt/module/spark-3.1.1' >> /etc/profile
echo 'export PATH=$PATH:$SPARK_HOME/bin' >> /etc/profile
source /etc/profile
# 配置spark-env.sh
cp /opt/module/spark-3.1.1/conf/spark-env.sh.template /opt/module/spark-3.1.1/conf/spark-env.sh
cat >> /opt/module/spark-3.1.1/conf/spark-env.sh << EOF
export JAVA_HOME=/opt/module/jdk1.8.0
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop
export SPARK_MASTER_HOST=master
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=1
export SPARK_WORKER_MEMORY=1g
EOF
# 配置spark-defaults.conf
cp /opt/module/spark-3.1.1/conf/spark-defaults.conf.template /opt/module/spark-3.1.1/conf/spark-defaults.conf
cat >> /opt/module/spark-3.1.1/conf/spark-defaults.conf << EOF
spark.master spark://master:7077
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master:9000/spark-logs
spark.history.fs.logDirectory hdfs://master:9000/spark-logs
EOF
# 创建Spark历史服务器日志目录
hadoop fs -mkdir -p /spark-logs
# 启动Spark
/opt/module/spark-3.1.1/sbin/start-all.sh
/opt/module/spark-3.1.1/sbin/start-history-server.sh
8. 安装Flink 1.14.0
# 解压Flink
tar -zxvf flink-1.14.0-bin-scala_2.12.tgz -C /opt/module/
mv /opt/module/flink-1.14.0 /opt/module/flink-1.14.0
# 配置环境变量
echo 'export FLINK_HOME=/opt/module/flink-1.14.0' >> /etc/profile
echo 'export PATH=$PATH:$FLINK_HOME/bin' >> /etc/profile
source /etc/profile
# 配置flink-conf.yaml
cat > /opt/module/flink-1.14.0/conf/flink-conf.yaml << EOF
jobmanager.rpc.address: master
jobmanager.rpc.port: 6123
jobmanager.memory.process.size: 1600m
taskmanager.memory.process.size: 1728m
taskmanager.numberOfTaskSlots: 1
parallelism.default: 1
EOF
# 配置workers
echo "master" > /opt/module/flink-1.14.0/conf/workers
# 启动Flink
/opt/module/flink-1.14.0/bin/start-cluster.sh
9. 安装HBase 2.2.3
# 解压HBase
tar -zxvf hbase-2.2.3-bin.tar.gz -C /opt/module/
mv /opt/module/hbase-2.2.3 /opt/module/hbase-2.2.3
# 配置环境变量
echo 'export HBASE_HOME=/opt/module/hbase-2.2.3' >> /etc/profile
echo 'export PATH=$PATH:$HBASE_HOME/bin' >> /etc/profile
source /etc/profile
# 配置hbase-env.sh
cat >> /opt/module/hbase-2.2.3/conf/hbase-env.sh << EOF
export JAVA_HOME=/opt/module/jdk1.8.0
export HBASE_MANAGES_ZK=false
EOF
# 配置hbase-site.xml
cat > /opt/module/hbase-2.2.3/conf/hbase-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
EOF
# 启动HBase
/opt/module/hbase-2.2.3/bin/start-hbase.sh
10. 安装Flume 1.9.0
# 解压Flume
tar -zxvf apache-flume-1.9.0-bin.tar.gz -C /opt/module/
mv /opt/module/apache-flume-1.9.0-bin /opt/module/flume-1.9.0
# 配置环境变量
echo 'export FLUME_HOME=/opt/module/flume-1.9.0' >> /etc/profile
echo 'export PATH=$PATH:$FLUME_HOME/bin' >> /etc/profile
source /etc/profile
# 配置flume-env.sh
cp /opt/module/flume-1.9.0/conf/flume-env.sh.template /opt/module/flume-1.9.0/conf/flume-env.sh
cat >> /opt/module/flume-1.9.0/conf/flume-env.sh << EOF
export JAVA_HOME=/opt/module/jdk1.8.0
EOF
# 创建一个简单的Flume配置示例
cat > /opt/module/flume-1.9.0/conf/example.conf << EOF
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = master
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
EOF
11. 启动脚本
下面是启动核心组件的脚本(不包含Hbase,可能存在冲突)
#!/bin/bash
# 大数据组件启动/关闭脚本
# 用法: ./bigdata_control.sh [start|stop|status]
# 组件顺序:
# 启动: HDFS -> YARN -> ZooKeeper -> Hive -> Kafka -> Flink
# 关闭: Flink -> Kafka -> Hive -> ZooKeeper -> YARN -> HDFS
# 颜色定义
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志文件
LOG_DIR="$HOME/logs"
mkdir -p $LOG_DIR
LOG_FILE="$LOG_DIR/bigdata_control_$(date '+%Y%m%d').log"
# 检查命令行参数
if [ $# -ne 1 ] || [[ ! "$1" =~ ^(start|stop|status)$ ]]; then
echo -e "${RED}用法错误: $0 [start|stop|status]${NC}"
exit 1
fi
ACTION=$1
# 记录操作开始
echo "$(date '+%Y-%m-%d %H:%M:%S') - 开始执行 $ACTION 操作" >> $LOG_FILE
# 检查 Hive Metastore 进程
check_hive_metastore() {
if ps aux | grep -v grep | grep "org.apache.hadoop.hive.metastore.HiveMetaStore" > /dev/null; then
return 0
fi
if ps aux | grep -v grep | grep "\-\-service metastore" > /dev/null; then
return 0
fi
if netstat -tlnp 2>/dev/null | grep -q ":9083"; then
return 0
fi
return 1
}
# 检查 HiveServer2 进程
check_hiveserver2() {
if ps aux | grep -v grep | grep "org.apache.hive.service.server.HiveServer2" > /dev/null; then
return 0
fi
if ps aux | grep -v grep | grep "\-\-service hiveserver2" > /dev/null; then
return 0
fi
if netstat -tlnp 2>/dev/null | grep -q ":10000"; then
return 0
fi
return 1
}
# 检查 Flink JobManager 进程
check_flink_jobmanager() {
if jps -l 2>/dev/null | grep -q "org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint"; then
return 0
fi
if ps aux | grep -v grep | grep -q "org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint"; then
return 0
fi
if ps aux | grep -v grep | grep -q "StandaloneSessionClusterEntrypoint"; then
return 0
fi
if netstat -tlnp 2>/dev/null | grep -q ":8081"; then
return 0
fi
if command -v curl &>/dev/null; then
if curl -s -m 3 http://localhost:8081/overview 2>/dev/null | grep -q "taskmanagers"; then
return 0
fi
fi
return 1
}
# 检查 Flink TaskManager 进程
check_flink_taskmanager() {
if jps -l 2>/dev/null | grep -q "org.apache.flink.runtime.taskexecutor.TaskManagerRunner"; then
return 0
fi
if ps aux | grep -v grep | grep -q "org.apache.flink.runtime.taskexecutor.TaskManagerRunner"; then
return 0
fi
if ps aux | grep -v grep | grep -q "TaskManagerRunner"; then
return 0
fi
if command -v curl &>/dev/null; then
if curl -s -m 3 http://localhost:8081/taskmanagers 2>/dev/null | grep -q "taskmanagers"; then
if ! curl -s -m 3 http://localhost:8081/taskmanagers 2>/dev/null | grep -q "\"taskmanagers\":\[\]"; then
return 0
fi
fi
fi
return 1
}
# 通用进程检查函数
check_process() {
local process_name=$1
case "$process_name" in
"HiveMetaStore")
check_hive_metastore
return $?
;;
"HiveServer2")
check_hiveserver2
return $?
;;
"StandaloneSessionClusterEntrypoint")
check_flink_jobmanager
return $?
;;
"TaskManagerRunner")
check_flink_taskmanager
return $?
;;
*)
if jps 2>/dev/null | grep -i "$process_name" > /dev/null; then
return 0
fi
return 1
;;
esac
}
# 等待进程启动的函数
wait_for_process() {
local process_name=$1
local max_wait=$2
local wait_time=0
echo -ne "${YELLOW}等待 $process_name 启动"
while [ $wait_time -lt $max_wait ]; do
if check_process "$process_name"; then
echo -e "${GREEN} [已启动]${NC}"
return 0
fi
echo -n "."
sleep 1
wait_time=$((wait_time + 1))
done
if check_process "$process_name"; then
echo -e "${GREEN} [已启动]${NC}"
return 0
fi
echo -e "${RED} [超时]${NC}"
return 1
}
# 等待进程停止的函数
wait_for_process_stop() {
local process_name=$1
local max_wait=$2
local wait_time=0
echo -ne "${YELLOW}等待 $process_name 停止"
while [ $wait_time -lt $max_wait ]; do
if ! check_process "$process_name"; then
echo -e "${GREEN} [已停止]${NC}"
return 0
fi
echo -n "."
sleep 1
wait_time=$((wait_time + 1))
done
echo -e "${RED} [超时]${NC}"
return 1
}
# 启动组件的函数
start_component() {
local component=$1
local start_cmd=$2
local process_check=$3
local max_wait=${4:-30}
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')] 正在启动 $component...${NC}"
echo "$(date '+%Y-%m-%d %H:%M:%S') - 启动 $component" >> $LOG_FILE
if check_process "$process_check"; then
echo -e "${GREEN}$component 已经在运行中${NC}"
return 0
fi
eval "$start_cmd" >> $LOG_FILE 2>&1
local start_result=$?
if [ $start_result -ne 0 ]; then
echo -e "${YELLOW}启动命令返回代码: $start_result,可能存在问题${NC}"
fi
if wait_for_process "$process_check" $max_wait; then
echo -e "${GREEN}$component 启动成功${NC}"
return 0
else
echo -e "${RED}$component 启动超时,但这不一定意味着失败${NC}"
case "$component" in
"Hive Metastore"|"HiveServer2"|"Flink JobManager")
echo -e "${YELLOW}正在进行额外检查...${NC}"
sleep 10
if check_process "$process_check"; then
echo -e "${GREEN}$component 已成功启动${NC}"
return 0
fi
echo -e "${YELLOW}建议手动检查 $component 状态${NC}"
;;
esac
return 1
fi
}
# 停止组件的函数
stop_component() {
local component=$1
local stop_cmd=$2
local process_check=$3
local max_wait=${4:-30}
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')] 正在停止 $component...${NC}"
echo "$(date '+%Y-%m-%d %H:%M:%S') - 停止 $component" >> $LOG_FILE
if ! check_process "$process_check"; then
echo -e "${YELLOW}$component 未运行${NC}"
return 0
fi
eval "$stop_cmd" >> $LOG_FILE 2>&1
if wait_for_process_stop "$process_check" $max_wait; then
echo -e "${GREEN}$component 已成功停止${NC}"
return 0
else
echo -e "${RED}$component 停止超时,可能需要手动终止进程${NC}"
return 1
fi
}
# 获取 PID 函数
get_pid() {
local process_name=$1
local pid=""
case "$process_name" in
"HiveMetaStore")
pid=$(ps aux | grep -v grep | grep "org.apache.hadoop.hive.metastore.HiveMetaStore" | awk '{print $2}' | head -1)
if [ -z "$pid" ]; then
pid=$(ps aux | grep -v grep | grep "\-\-service metastore" | awk '{print $2}' | head -1)
fi
;;
"HiveServer2")
pid=$(ps aux | grep -v grep | grep "org.apache.hive.service.server.HiveServer2" | awk '{print $2}' | head -1)
if [ -z "$pid" ]; then
pid=$(ps aux | grep -v grep | grep "\-\-service hiveserver2" | awk '{print $2}' | head -1)
fi
;;
"StandaloneSessionClusterEntrypoint")
pid=$(jps -l 2>/dev/null | grep "org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint" | awk '{print $1}' | head -1)
if [ -z "$pid" ]; then
pid=$(ps aux | grep -v grep | grep "StandaloneSessionClusterEntrypoint" | awk '{print $2}' | head -1)
fi
;;
"TaskManagerRunner")
pid=$(jps -l 2>/dev/null | grep "org.apache.flink.runtime.taskexecutor.TaskManagerRunner" | awk '{print $1}' | head -1)
if [ -z "$pid" ]; then
pid=$(ps aux | grep -v grep | grep "TaskManagerRunner" | awk '{print $2}' | head -1)
fi
;;
*)
pid=$(jps 2>/dev/null | grep -i "$process_name" | awk '{print $1}')
;;
esac
echo "$pid"
}
# 改进的组件状态检查函数
check_component_status() {
local component=$1
local process_check=$2
echo -n "$component: "
if check_process "$process_check"; then
local pid=$(get_pid "$process_check")
if [ ! -z "$pid" ]; then
echo -e "${GREEN}运行中 (PID: $pid)${NC}"
else
echo -e "${GREEN}运行中${NC}"
fi
else
echo -e "${RED}未运行${NC}"
fi
}
# 启动所有组件
start_all() {
echo -e "${BLUE}==============================================${NC}"
echo -e "${BLUE} 开始启动所有大数据组件 ${NC}"
echo -e "${BLUE}==============================================${NC}"
echo ""
# 1. 启动 HDFS
start_component "HDFS" "start-dfs.sh" "NameNode" 60 || {
echo -e "${RED}HDFS 启动失败,终止后续组件启动${NC}"
return 1
}
# 检查 HDFS 健康状态
echo "检查 HDFS 健康状态..."
hdfs dfsadmin -report >> $LOG_FILE 2>&1
if [ $? -ne 0 ]; then
echo -e "${RED}HDFS 可能未正常运行,请检查${NC}"
else
echo -e "${GREEN}HDFS 运行正常${NC}"
fi
# 2. 启动 YARN
start_component "YARN" "start-yarn.sh" "ResourceManager" 60 || {
echo -e "${YELLOW}YARN 启动失败,但将继续启动其他组件${NC}"
}
# 3. 启动 ZooKeeper
start_component "ZooKeeper" "$ZOOKEEPER_HOME/bin/zkServer.sh start" "QuorumPeerMain" 30 || {
echo -e "${YELLOW}ZooKeeper 启动失败,但将继续启动其他组件${NC}"
}
# 4. 启动 Hive Metastore
start_component "Hive Metastore" "nohup hive --service metastore &" "HiveMetaStore" 60 || {
echo -e "${YELLOW}Hive Metastore 启动失败或超时,但将继续启动其他组件${NC}"
}
# 5. 启动 HiveServer2
start_component "HiveServer2" "nohup hive --service hiveserver2 &" "HiveServer2" 60 || {
echo -e "${YELLOW}HiveServer2 启动失败或超时,但将继续启动其他组件${NC}"
}
# 6. 启动 Kafka
start_component "Kafka" "$KAFKA_HOME/bin/kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties" "Kafka" 45 || {
echo -e "${YELLOW}Kafka 启动失败,但将继续启动其他组件${NC}"
}
# 7. 启动 Flink JobManager (standalone模式)
start_component "Flink JobManager" "$FLINK_HOME/bin/start-cluster.sh" "StandaloneSessionClusterEntrypoint" 60 || {
echo -e "${YELLOW}Flink 启动命令执行完成,但进程检测未通过${NC}"
sleep 10
if check_flink_jobmanager; then
echo -e "${GREEN}Flink JobManager 已成功启动${NC}"
else
echo -e "${YELLOW}Flink JobManager 可能未正常启动,请手动检查${NC}"
fi
}
echo ""
echo -e "${BLUE}==============================================${NC}"
echo -e "${GREEN}所有组件启动流程已完成${NC}"
echo -e "${BLUE}==============================================${NC}"
}
# 停止所有组件
stop_all() {
echo -e "${BLUE}==============================================${NC}"
echo -e "${BLUE} 开始停止所有大数据组件 ${NC}"
echo -e "${BLUE}==============================================${NC}"
echo ""
# 按照与启动相反的顺序停止组件
# 1. 停止 Flink
stop_component "Flink" "$FLINK_HOME/bin/stop-cluster.sh" "StandaloneSessionClusterEntrypoint" 30
# 2. 停止 Kafka
stop_component "Kafka" "$KAFKA_HOME/bin/kafka-server-stop.sh" "Kafka" 30
# 3. 停止 HiveServer2
stop_component "HiveServer2" "pkill -f 'org.apache.hive.service.server.HiveServer2' || pkill -f 'hive --service hiveserver2'" "HiveServer2" 30
# 4. 停止 Hive Metastore
stop_component "Hive Metastore" "pkill -f 'org.apache.hadoop.hive.metastore.HiveMetaStore' || pkill -f 'hive --service metastore'" "HiveMetaStore" 30
# 5. 停止 ZooKeeper
stop_component "ZooKeeper" "$ZOOKEEPER_HOME/bin/zkServer.sh stop" "QuorumPeerMain" 30
# 6. 停止 YARN
stop_component "YARN" "stop-yarn.sh" "ResourceManager" 60
# 7. 停止 HDFS
stop_component "HDFS" "stop-dfs.sh" "NameNode" 60
echo ""
echo -e "${BLUE}==============================================${NC}"
echo -e "${GREEN}所有组件停止流程已完成${NC}"
echo -e "${BLUE}==============================================${NC}"
}
# 显示所有组件状态
show_status() {
echo -e "${BLUE}==============================================${NC}"
echo -e "${BLUE} 大数据组件运行状态 ${NC}"
echo -e "${BLUE}==============================================${NC}"
echo ""
check_component_status "NameNode" "NameNode"
check_component_status "DataNode" "DataNode"
check_component_status "ResourceManager" "ResourceManager"
check_component_status "NodeManager" "NodeManager"
check_component_status "ZooKeeper" "QuorumPeerMain"
check_component_status "Hive Metastore" "HiveMetaStore"
check_component_status "HiveServer2" "HiveServer2"
check_component_status "Kafka" "Kafka"
check_component_status "Flink JobManager" "StandaloneSessionClusterEntrypoint"
check_component_status "Flink TaskManager" "TaskManagerRunner"
echo ""
echo -e "${BLUE}==============================================${NC}"
# 显示 Flink REST API 状态
if command -v curl &>/dev/null; then
echo -e "${YELLOW}Flink REST API 状态:${NC}"
if curl -s -m 3 http://localhost:8081/overview 2>/dev/null | grep -q "taskmanagers"; then
echo -e "${GREEN}Flink REST API 正常响应${NC}"
else
echo -e "${RED}Flink REST API 无响应${NC}"
fi
fi
}
# 检查环境变量
check_env() {
local missing_vars=0
if [ -z "$ZOOKEEPER_HOME" ]; then
echo -e "${YELLOW}警告: ZOOKEEPER_HOME 环境变量未设置${NC}"
missing_vars=1
fi
if [ -z "$KAFKA_HOME" ]; then
echo -e "${YELLOW}警告: KAFKA_HOME 环境变量未设置${NC}"
missing_vars=1
fi
if [ -z "$FLINK_HOME" ]; then
echo -e "${YELLOW}警告: FLINK_HOME 环境变量未设置${NC}"
missing_vars=1
fi
if [ $missing_vars -eq 1 ]; then
echo -e "${YELLOW}请设置必要的环境变量或修改脚本中的路径${NC}"
echo ""
if [ "$ACTION" != "status" ]; then
echo -n -e "${YELLOW}是否继续执行? (y/n): ${NC}"
read -r confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
echo -e "${RED}操作已取消${NC}"
exit 1
fi
fi
fi
}
# 主函数
main() {
case "$ACTION" in
start)
start_all
;;
stop)
stop_all
;;
status)
show_status
;;
*)
echo -e "${RED}不支持的操作: $ACTION${NC}"
exit 1
;;
esac
echo ""
echo -e "${YELLOW}日志文件: $LOG_FILE${NC}"
}
# 检查环境变量
check_env
# 执行主函数
main
使用方式:
保存为了bigdata.sh
加入环境变量
mkdir ~/bin
mv bigdata.sh ~/bin/bigdata
chmod +x ~/bin/bigdata
echo 'export PATH="$HOME/bin:$PATH"' >> ~/.bashrc
使更改生效
source ~/.bashrc
测试命令
bigdata start
至此,安装全部结束!Happy Coding!
2025年4月20日