제목의 링크에서 바이너리를 다운받아 설치한다.
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.2/protoc-21.2-linux-x86_64.zip
unzip protoc-21.2-linux-x86_64.zip
sudo cp -r bin /usr/
sudo cp -r include /usr/
protoc --version
# libprotoc 3.21.2
$HADOOP_HOME/etc/hadoop/hadoop-env.sh#!/bin/bash
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-3.el9.x86_64
export HADOOP_HOME=/home/hdfs/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_HEAPSIZE_MAX=8g
export HADOOP_HEAPSIZE_MIN=4g
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
# Log directory (기본값: ${HADOOP_HOME}/logs)
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# PID directory (기본값: /tmp, 재부팅 시 유실되므로 변경 권장)
# export HADOOP_PID_DIR=/var/run/hadoop
# Default log4j setting
# export HADOOP_ROOT_LOGGER=INFO,console
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
# Secure datanodes (SASL 사용 시 jsvc 불필요)
# export HDFS_DATANODE_SECURE_USER=hdfs
# NameNode / DataNode / JournalNode 별도 JVM 옵션이 필요하면 여기서 지정
# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
# YARN / MapReduce 옵션은 yarn-env.sh / mapred-env.sh 에 분리되어 정의
$HADOOP_HOME/etc/hadoop/yarn-env.shexport YARN_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
$HADOOP_HOME/etc/hadoop/workers데이터 노드로 사용될 호스트들을 나열한다.
worker01.haedongg.net
worker02.haedongg.net
worker03.haedongg.net
$HADOOP_HOME/etc/hadoop/core-site.xml<configuration>
<property>
<n>fs.defaultFS</n>
<value>hdfs://haedongcluster</value>
</property>
<property>
<n>ha.zookeeper.quorum</n>
<value>hadoopm01.haedongg.net:2181,hadoopm02.haedongg.net:2181,hadoopm03.haedongg.net:2181</value>
</property>
</configuration>
$HADOOP_HOME/etc/hadoop/hdfs-site.xml<configuration>
<property>
<n>dfs.nameservices</n>
<value>haedongcluster</value>
</property>
<property>
<n>dfs.namenode.shared.edits.dir</n>
<value>qjournal://hadoopm01.haedongg.net:8485;hadoopm02.haedongg.net:8485;hadoopm03.haedongg.net:8485/haedongcluster</value>
</property>
<property>
<n>dfs.client.failover.proxy.provider.haedongcluster</n>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<n>dfs.ha.automatic-failover.enabled</n>
<value>true</value>
</property>
<property>
<n>dfs.ha.namenodes.haedongcluster</n>
<value>namenode1,namenode2</value>
</property>
<property>
<n>dfs.namenode.rpc-address.haedongcluster.namenode1</n>
<value>hadoopm01.haedongg.net:8020</value>
</property>
<property>
<n>dfs.namenode.rpc-address.haedongcluster.namenode2</n>
<value>hadoopm02.haedongg.net:8020</value>
</property>
<property>
<n>dfs.namenode.http-address.haedongcluster.namenode1</n>
<value>hadoopm01.haedongg.net:9870</value>
</property>
<property>
<n>dfs.namenode.http-address.haedongcluster.namenode2</n>
<value>hadoopm02.haedongg.net:9870</value>
</property>
<property>
<n>dfs.namenode.servicerpc-address.haedongcluster.namenode1</n>
<value>hadoopm01.haedongg.net:8022</value>
</property>
<property>
<n>dfs.namenode.servicerpc-address.haedongcluster.namenode2</n>
<value>hadoopm02.haedongg.net:8022</value>
</property>
<property>
<n>dfs.namenode.name.dir</n>
<value>/xvdb/hadoop/namenode,/xvdc/hadoop/namenode</value>
</property>
<property>
<n>dfs.datanode.data.dir</n>
<value>/xvdb/hadoop/datanode,/xvdc/hadoop/datanode</value>
</property>
<property>
<n>dfs.journalnode.edits.dir</n>
<value>/xvdb/hadoop/journalnode</value>
</property>
<property>
<n>dfs.ha.fencing.methods</n>
<value>sshfence
shell(/bin/true)</value>
<description>
프로덕션 환경에서는 sshfence 를 우선 사용하고, 실패 시 fallback 으로 shell(/bin/true) 사용 권장.
sshfence 는 NameNode 들이 서로 SSH key-based 접근 가능해야 함.
</description>
</property>
<property>
<n>dfs.ha.fencing.ssh.private-key-files</n>
<value>/home/hdfs/.ssh/id_rsa</value>
</property>
<property>
<n>dfs.blocksize</n>
<value>134217728</value>
<description>
HDFS의 block size (Byte 단위).
8MB=8,388,608 / 16MB=16,777,216 / 32MB=33,554,432 / 64MB=67,108,864 / 128MB=134,217,728 (권장) / 256MB=268,435,456
</description>
</property>
<property>
<n>dfs.permissions.enabled</n>
<value>true</value>
</property>
<property>
<n>fs.permissions.umask-mode</n>
<value>022</value>
</property>
<property>
<n>fs.trash.interval</n>
<value>10080</value>
<description>value의 값(=분)에 따라 휴지통이 비워진다. 10080 = 7일</description>
</property>
</configuration>
$HADOOP_HOME/etc/hadoop/mapred-site.xml<configuration>
<property>
<n>mapreduce.map.output.compress.codec</n>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<n>mapreduce.map.output.compress</n>
<value>true</value>
</property>
<property>
<n>zlib.compress.level</n>
<value>DEFAULT_COMPRESSION</value>
</property>
<property>
<n>mapreduce.jobhistory.address</n>
<value>hadoopm03.haedongg.net:10020</value>
</property>
<property>
<n>mapreduce.jobhistory.webapp.address</n>
<value>hadoopm03.haedongg.net:19888</value>
</property>
<property>
<n>mapreduce.jobhistory.admin.address</n>
<value>hadoopm03.haedongg.net:10033</value>
</property>
<property>
<n>mapreduce.framework.name</n>
<value>yarn</value>
</property>
<property>
<n>mapreduce.map.memory.mb</n>
<value>2048</value>
</property>
<property>
<n>mapreduce.map.cpu.vcores</n>
<value>1</value>
</property>
<property>
<n>mapreduce.reduce.memory.mb</n>
<value>4096</value>
</property>
<property>
<n>mapreduce.reduce.cpu.vcores</n>
<value>1</value>
</property>
<property>
<n>yarn.app.mapreduce.am.env</n>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<n>mapreduce.map.env</n>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<n>mapreduce.reduce.env</n>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
$HADOOP_HOME/etc/hadoop/yarn-site.xml<?xml version="1.0"?>
<configuration>
<property>
<n>yarn.resourcemanager.ha.enabled</n>
<value>true</value>
</property>
<property>
<n>yarn.resourcemanager.ha.automatic-failover.enabled</n>
<value>true</value>
</property>
<property>
<n>yarn.resourcemanager.ha.automatic-failover.embedded</n>
<value>true</value>
</property>
<property>
<n>yarn.resourcemanager.recovery.enabled</n>
<value>true</value>
</property>
<property>
<n>yarn.resourcemanager.zk-address</n>
<value>hadoopm01.haedongg.net:2181,hadoopm02.haedongg.net:2181,hadoopm03.haedongg.net:2181</value>
</property>
<property>
<n>yarn.resourcemanager.store.class</n>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<n>yarn.client.failover-sleep-base-ms</n>
<value>100</value>
</property>
<property>
<n>yarn.client.failover-sleep-max-ms</n>
<value>2000</value>
</property>
<property>
<n>yarn.resourcemanager.cluster-id</n>
<value>yarnRM</value>
</property>
<property>
<n>yarn.resourcemanager.ha.rm-ids</n>
<value>rm1,rm2</value>
</property>
<property>
<n>yarn.resourcemanager.hostname.rm1</n>
<value>hadoopm02.haedongg.net</value>
</property>
<property>
<n>yarn.resourcemanager.hostname.rm2</n>
<value>hadoopm03.haedongg.net</value>
</property>
<property>
<n>yarn.resourcemanager.address.rm1</n>
<value>hadoopm02.haedongg.net:8032</value>
</property>
<property>
<n>yarn.resourcemanager.scheduler.address.rm1</n>
<value>hadoopm02.haedongg.net:8030</value>
</property>
<property>
<n>yarn.resourcemanager.resource-tracker.address.rm1</n>
<value>hadoopm02.haedongg.net:8031</value>
</property>
<property>
<n>yarn.resourcemanager.admin.address.rm1</n>
<value>hadoopm02.haedongg.net:8033</value>
</property>
<property>
<n>yarn.resourcemanager.webapp.address.rm1</n>
<value>hadoopm02.haedongg.net:8088</value>
</property>
<property>
<n>yarn.resourcemanager.webapp.https.address.rm1</n>
<value>hadoopm02.haedongg.net:8090</value>
</property>
<property>
<n>yarn.resourcemanager.address.rm2</n>
<value>hadoopm03.haedongg.net:8032</value>
</property>
<property>
<n>yarn.resourcemanager.scheduler.address.rm2</n>
<value>hadoopm03.haedongg.net:8030</value>
</property>
<property>
<n>yarn.resourcemanager.resource-tracker.address.rm2</n>
<value>hadoopm03.haedongg.net:8031</value>
</property>
<property>
<n>yarn.resourcemanager.admin.address.rm2</n>
<value>hadoopm03.haedongg.net:8033</value>
</property>
<property>
<n>yarn.resourcemanager.webapp.address.rm2</n>
<value>hadoopm03.haedongg.net:8088</value>
</property>
<property>
<n>yarn.resourcemanager.webapp.https.address.rm2</n>
<value>hadoopm03.haedongg.net:8090</value>
</property>
<property>
<n>yarn.scheduler.minimum-allocation-mb</n>
<value>1024</value>
</property>
<property>
<n>yarn.scheduler.maximum-allocation-mb</n>
<value>8192</value>
</property>
<property>
<n>yarn.scheduler.minimum-allocation-vcores</n>
<value>1</value>
</property>
<property>
<n>yarn.scheduler.maximum-allocation-vcores</n>
<value>8</value>
</property>
<property>
<n>yarn.resourcemanager.scheduler.class</n>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<n>yarn.scheduler.capacity.resource-calculator</n>
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
</property>
<property>
<n>hadoop.registry.zk.quorum</n>
<value>hadoopm01.haedongg.net:2181,hadoopm02.haedongg.net:2181,hadoopm03.haedongg.net:2181</value>
</property>
<property>
<n>yarn.timeline-service.enabled</n>
<value>true</value>
</property>
<property>
<n>yarn.timeline-service.webapp.address</n>
<value>hadoopm03.haedongg.net:8188</value>
</property>
<property>
<n>yarn.nodemanager.aux-services</n>
<value>mapreduce_shuffle</value>
</property>
<property>
<n>yarn.nodemanager.aux-services.mapreduce_shuffle.class</n>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<n>yarn.nodemanager.env-whitelist</n>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>