Hadoop集群搭建

字数 62阅读 221

下载hadoop3.0.0安装包

在usr下创建目录hadoop:
[root@nn1 usr]# mkdir hadoop
进入新目录(hadoop) 并把安装包复制过来:
[root@nn1 hadoop]# mv /home/admin/software/hadoop-3.0.0-src.tar.gz ./
解压:
[root@nn1 hadoop]# tar -zxvf hadoop-3.0.0-src.tar.gz
得到hadoop-3.0.0目录  并进入
[root@nn1 hadoop]# cd hadoop-3.0.0/
在/usr/hadoop/hadoop-3.0.0/目录下,建立tmp、hdfs/name、hdfs/data目录
[root@nn1 hadoop-3.0.0]# mkdir hdfs
[root@nn1 hadoop-3.0.0]# mkdir tmp
[root@nn1 hadoop-3.0.0]# mkdir hdfs/data
[root@nn1 hadoop-3.0.0]# mkdir hdfs/name
设置环境变量:
# set hadoop path
export HADOOP_HOME=/usr/hadoop/hadoop-3.0.0
export PATH=$PATH:$HADOOP_HOME/bin

使环境变量生效
[root@nn1 hadoop-3.0.0]# source /etc/profile

设置hadoop文件

hadoop-env.sh

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/hadoop-env.sh
#
# The java implementation to use.  
#export JAVA_HOME=${JAVA_HOME}  
export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_161

yarn-env.sh

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/yarn-site.xml 
#The java implementation to usr  
export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_161

core-site.xml

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://192.168.0.240:9000</value>
</property>

<property>
    <name>hadoop.tmp.dir</name>
    <value>/usr/hadoop/hadoop-3.0.0/tmp</value>
</property>
</configuration>

hdfs-site.xml

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!--HDFS NN的逻辑名称-->
<property>
    <name>dfs.nameservices</name>
    <value>myhdfs</value>
</property>

<!--给定服务逻辑名称myhdfs的节点列表-->
<property>
    <name>dfs.ha.namenode.myhdfs</name>
    <value>nn1,nn2</value>
</property>

<!--myhdfs中nn1节点对外服务的RPC地址-->
<property>
    <name>dfs.namenode.rpc-address.myhdfs.nn1</name>
    <value>192.168.0.240:9000</value>
</property>

<!--myhdfs中nn2节点对外服务的RPC地址-->
<property>
    <name>dfs.namenode.rpc-address.myhdfs.nn2</name>
    <value>192.168.0.241:9000</value>
</property>

<!--myhdfs中nn1节点对外服务的http地址-->
<property>
    <name>dfs.namenode.http-address.myhdfs.nn1</name>
    <value>192.168.0.240:50070</value>
</property>

<!--myhdfs中nn2节点对外服务的http地址-->
<property>
    <name>dfs.namenode.http-address.myhdfs.nn2</name>
    <value>192.168.0.241:50070</value>
</property>

<property>
    <name>dfs.namenode.http-address</name>
    <value>192.168.0.240:50070</value>
</property>

<property>
    <name>dfs.namenode.secondary.http-address</name>
    <value>192.168.0.241:50070</value>
</property>

<!--namenode上存储hdfs名字空间元数据-->
<property>
    <name>dfs.namenode.name.dir</name>
    <value>/usr/hadoop/hadoop-3.0.0/hdfs/name</value>
</property>

<!--datanode上数据块的物理存储位置-->
<property>
    <name>dfs.datanode.data.dir</name>
    <value>/usr/hadoop/hadoop-3.0.0/hdfs/data</value>
</property>

<!--副本个数,配置默认是3,应小于datanode机器数量-->
<property>
    <name>dfs.replication</name>
    <value>2</value>
</property>

</configuration>

mapred-site.xml

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
        <!--指定运行mapreduce的环境是yarn-->
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
</property>
<property>
         <!--job tracker的web管理端口-->
         <name>mapred.job.tracker.http.address</name>
         <value>192.168.0.240:50030</value>
</property>
<property>
         <!--task tracker的HTTP端口-->
         <name>mapred.task.tracker.http.address</name>
         <value>192.168.0.240:50060</value>
</property>
</configuration>

yarn-site.xml

[root@nn1 hadoop-3.0.0]# vim /usr/hadoop/hadoop-3.0.0/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
 <!-- 定义集群的ID。被选举器使用确保RM不会在其他集群中接管称为活跃RM -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>nn1</value>
    </property>

    <!--开启resource manager HA,默认为false--> 
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    
    <!--配置与zookeeper的连接地址--> 
    <property>
        <name>yarn.resourcemanager.zk-state-store.parent-path</name>
        <value>/rmstore/nn1</value>
    </property>

    <!--开启故障自动切换--> 
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>

    <!--当自动故障切换可用时,使用内嵌的选举器来选择活跃RM。默认的,在HA激活下可用。-->
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
        <value>true</value>
    </property>

    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.zk-nn1-path</name>
        <value>/yarn-leader-election</value>
    </property>

    <!--配置resource manager -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>

    <!--分别指定RM的地址-->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>nn1</value>
    </property>

    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>dn1</value>
    </property>

    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>false</value>
    </property>

    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>

    <!--指定zk集群地址-->
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>nn1:2181,dn1:2182,dn2:2183</value>
    </property>

    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>nn1:8088</value>
    </property>

    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>dn1:8088</value>
    </property>
    
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
        <final>true</final>
    </property>
</configuration>

启动之前:

添加
# vim sbin/start-dfs.sh
# vim sbin/stop-dfs.sh

内容为:
HDFS_DATANODE_USER=root  
HADOOP_SECURE_DN_USER=hdfs  
HDFS_NAMENODE_USER=root  
HDFS_SECONDARYNAMENODE_USER=root  
添加
# vim sbin/start-yarn.sh 
# vim sbin/stop-yarn.sh 

内容为:
YARN_RESOURCEMANAGER_USER=root  
HADOOP_SECURE_DN_USER=yarn  
YARN_NODEMANAGER_USER=root 

将整个hadoop3.0.0文件复制到两台分机上

scp -r /usr/hadoop/hadoop-3.0.0/ root@192.168.0.241:/usr/hadoop/
scp -r /usr/hadoop/hadoop-3.0.0/ root@192.168.0.242:/usr/hadoop/

最后

./sbin/start-all.sh (全部启动)

推荐阅读更多精彩内容