本文基于开源的Apache Hadoop部署。

一、环境

版本:
Aapche Hadoop: 3.3.2
Zookeeper: 3.4.9

角色:

ip hostname role
192.168.100.200 master namenode、journalnode、datanode、zookeeper
192.168.100.201 node01 namenode、journalnode、datanode、zookeeper
192.168.100.202 node02 journalnode、datanode、zookeeper

二、基本环境配置

基本环境配置方面步骤和CDH部署基本一致。
注:以下步骤无特别说明,需在所有节点进行操作

1. 配置主机名解析

cat >> /etc/hosts <<EOF
192.168.100.200 master
192.168.100.201 node01
192.168.100.202 node02
EOF

2. 关闭防火墙和selinux

systemctl stop firewalld
systemctl disable firewalld
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config && setenforce 0

3. 配置时间同步

yum install -y ntp
systemctl enable ntpd

4. 安装jdk1.8

5. 创建用户,并配置相关用户免密登陆

groupadd --system hadoop
useradd --system -m -d /home/hdfs -G hadoop hdfs
echo "hdfs" | passwd --stdin hdfs

useradd --system -m -d /home/zookeeper -G hadoop zookeeper

配置hdfs用户免密登陆(namenode节点做即可)

sudo -u hdfs ssh-keygen -t rsa -P '' -f /home/hdfs/.ssh/id_rsa
sudo -u hdfs ssh-copy-id master
sudo -u hdfs ssh-copy-id node01
sudo -u hdfs ssh-copy-id node02

6. 安装依赖包

# NameNode主备切换依赖fuser,所有nameNode节点安装psmisc
yum install -y psmisc

7. 上传安装包

mkdir -p /data/dfs
tar zxf hadoop-3.3.2.tar.gz -C /data

mkdir -p /data/zookeeper/{data,logs}
tar zxf zookeeper-3.4.9.tar.gz -C /data/zookeeper/ --strip=1

三、安装Zookeeper

注:以下步骤无特别说明,需在所有zookeeper节点进行操作

1. 配置环境变量

cat > /etc/profile.d/zookeeper.sh << EOF
export ZOOKEEPER_HOME=/data/zookeeper
export PATH=\$ZOOKEEPER_HOME/bin:\$PATH
EOF

2. 修改配置文件

cat > /data/zookeeper/conf/zoo.cfg << EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/zookeeper/data
dataLogDir=/data/zookeeper/logs
clientPort=2181
server.1=master:2888:3888
server.2=node01:2888:3888
server.3=node02:2888:3888
EOF

3. 修改节点id

# master节点
echo '1' >/data/zookeeper/data/myid

# node01节点
echo '2' >/data/zookeeper/data/myid

# node02节点
echo '3' >/data/zookeeper/data/myid

4. 启动zookeeper

chown zookeeper:zookeeper -R /data/zookeeper
su - zookeeper -c "/data/zookeeper/bin/zkServer.sh start"

四、安装HDFS

1. 修改core-site.xml

vim hadoop-3.3.2/etc/hadoop/core-site.xml
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://nameservice1</value>
        <description>hdfs地址端口为8020</description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/data/hadoop-3.3.2/dfs</value>
        <description>hadoop运行数据存放地址</description>
    </property>
    <property>
        <name>io.file.buffer.size</name>
        <value>65536</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hdfs.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hdfs.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.yarn.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.yarn.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hive.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hive.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,node01:2181,node02:2181</value>
    </property>

2. 修改hdfs-site.xml

vim hadoop-3.3.2/etc/hadoop/hdfs-site.xml    
    <property>
        <name>dfs.replication</name>
        <value>3</value>
        <description>设置数据块应该被复制的份数</description>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>134217728</value>
        <description>设置数据块大小</description>
    </property>
    <property>
        <name>dfs.namenode.safemode.min.datanodes</name>
        <value>1</value>
        <description>指定在名称节点存在safemode前必须活动的DataNodes数量</description>
    </property>
    <property>
        <name>dfs.namenode.safemode.threshold-pct</name>
        <value>0.99</value>
        <description>小于等于0意味不进入安全模式,大于1意味一直处于安全模式</description>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>true</value>
        <description>文件操作时的权限检查标识</description>
    </property>
    <property>
        <name>dfs.namenode.acls.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.client.read.shortcircuit</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/data/dfs/nn</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/data/dfs/dn</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.dir</name>
        <value>file:/data/dfs/snn</value>
    </property>
    <property>
        <name>dfs.nameservices</name>
        <value>nameservice1</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.nameservice1</name>
        <value>namenode1,namenode2</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.nameservice1.namenode1</name>
        <value>master:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.nameservice1.namenode2</name>
        <value>node01:8020</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.nameservice1.namenode1</name>
        <value>master:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.nameservice1.namenode2</name>
        <value>node01:9870</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://master:8485;node01:8485;node02:8485/nameservice1</value>
    </property> 
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/data/dfs/jn</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.nameservice1</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>  
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hdfs/.ssh/id_rsa</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>

3. 修改workers配置文件

cat > hadoop-3.3.2/etc/hadoop/workers << EOF
master
node01
node02
EOF

4. 配置环境变量

cat >> hadoop-3.3.2/etc/hadoop/hadoop-env.sh << EOF
export JAVA_HOME=/usr/java/jdk1.8.0_181/
export HDFS_NAMENODE_USER=hdfs
export HDFS_DATANODE_USER=hdfs
export HDFS_SECONDARYNAMENODE_USER=hdfs
export HDFS_ZKFC_USER=hdfs
export HDFS_JOURNALNODE_USER=hdfs
EOF

cat >> /etc/profile.d/hadoop.sh << EOF
export HADOOP_HOME=/data/hadoop-3.3.2
export HADOOP_HDFS_HOME=\$HADOOP_HOME
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin
EOF

5. 分发文件

分发以上文件到其他两个节点

for node in {node01,node02};do \
scp hadoop-3.3.2/etc/hadoop/core-site.xml $node:/data/hadoop-3.3.2/etc/hadoop/core-site.xml;
scp hadoop-3.3.2/etc/hadoop/hdfs-site.xml $node:/data/hadoop-3.3.2/etc/hadoop/hdfs-site.xml;
scp hadoop-3.3.2/etc/hadoop/workers $node:/data/hadoop-3.3.2/etc/hadoop/workers;
scp hadoop-3.3.2/etc/hadoop/hadoop-env.sh $node:/data/hadoop-3.3.2/etc/hadoop/hadoop-env.sh;
scp /etc/profile.d/hadoop.sh $node:/etc/profile.d/hadoop.sh; \
done

6. 格式化zookeeper

hdfs zkfc -formatZK

7. 启动journalnode

格式化namanode前必需启动journalnode和zookeeper

su - hdfs -c "hdfs --daemon start journalnode"

8. 启动namenode

在master节点执行格式化,并启动namenode

hdfs namenode -format
su - hdfs -c "hdfs --daemon start namenode"

将master节点上namenode的数据同步到node01 nameNode节点

scp -r /data/dfs/nn node01:/data/dfs/

在node01节点执行:

chown hdfs:hdfs -R /data/dfs/nn
su - hdfs -c "hdfs namenode -bootstrapStandby"
su - hdfs -c "hdfs --daemon start namenode"

9. 启动其他服务

start-dfs.sh

五、验证HDFS服务

su - hdfs -c "hdfs dfs -mkdir -p /user/hdfs"
su - hdfs -c "hdfs dfs -chmod 755 /user/hdfs"
hdfs dfs -ls /