제목의 링크에서 바이너리를 다운받아 설치한다.
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.2/protoc-21.2-linux-x86_64.zip
unzip protoc-21.2-linux-x86_64.zip
sudo cp -r bin /usr/
sudo cp -r include /usr/
protoc --version
libprotoc 3.21.2
#!/bin/bash
export JAVA_HOME=/home/hdfs/jdk
export HADOOP_HOME=/home/hdfs/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_HEAPSIZE_MAX=8g
export HADOOP_HEAPSIZE_MIN=4g
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
# export HADOOP_CLIENT_OPTS=""
#
# A note about classpaths.
#
# By default, Apache Hadoop overrides Java's CLASSPATH
# environment variable. It is configured such
# that it starts out blank with new entries added after passing
# a series of checks (file/dir exists, not already listed aka
# de-deduplication). During de-deduplication, wildcards and/or
# directories are *NOT* expanded to keep it simple. Therefore,
# if the computed classpath has two specific mentions of
# awesome-methods-1.0.jar, only the first one added will be seen.
# If two directories are in the classpath that both contain
# awesome-methods-1.0.jar, then Java will pick up both versions.
# An additional, custom CLASSPATH. Site-wide configs should be
# handled via the shellprofile functionality, utilizing the
# hadoop_add_classpath function for greater control and much
# harder for apps/end-users to accidentally override.
# Similarly, end users should utilize ${HOME}/.hadooprc .
# This variable should ideally only be used as a short-cut,
# interactive way for temporary additions on the command line.
# export HADOOP_CLASSPATH="/some/cool/path/on/your/machine"
# Should HADOOP_CLASSPATH be first in the official CLASSPATH?
# export HADOOP_USER_CLASSPATH_FIRST="yes"
# If HADOOP_USE_CLIENT_CLASSLOADER is set, the classpath along
# with the main jar are handled by a separate isolated
# client classloader when 'hadoop jar', 'yarn jar', or 'mapred job'
# is utilized. If it is set, HADOOP_CLASSPATH and
# HADOOP_USER_CLASSPATH_FIRST are ignored.
# export HADOOP_USE_CLIENT_CLASSLOADER=true
# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of
# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER
# is enabled. Names ending in '.' (period) are treated as package names, and
# names starting with a '-' are treated as negative matches. For example,
# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop."
# Enable optional, bundled Hadoop features
# This is a comma delimited list. It may NOT be overridden via .hadooprc
# Entries may be added/removed as needed.
# export HADOOP_OPTIONAL_TOOLS="hadoop-kafka,hadoop-openstack,hadoop-aliyun,hadoop-azure,hadoop-azure-datalake,hadoop-aws"
###
# Options for remote shell connectivity
###
# There are some optional components of hadoop that allow for
# command and control of remote hosts. For example,
# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
# Options to pass to SSH when one of the "log into a host and
# start/stop daemons" scripts is executed
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
# The built-in ssh handler will limit itself to 10 simultaneous connections.
# For pdsh users, this sets the fanout size ( -f )
# Change this to increase/decrease as necessary.
# export HADOOP_SSH_PARALLEL=10
# Filename which contains all of the hosts for any remote execution
# helper scripts # such as workers.sh, start-dfs.sh, etc.
# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers"
###
# Options for all daemons
###
#
#
# Many options may also be specified as Java properties. It is
# very common, and in many cases, desirable, to hard-set these
# in daemon _OPTS variables. Where applicable, the appropriate
# Java property is also identified. Note that many are re-used
# or set differently in certain contexts (e.g., secure vs
# non-secure)
#
# Where (primarily) daemon log files are stored.
# ${HADOOP_HOME}/logs by default.
# Java property: hadoop.log.dir
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# A string representing this instance of hadoop. $USER by default.
# This is used in writing log and pid files, so keep that in mind!
# Java property: hadoop.id.str
# export HADOOP_IDENT_STRING=$USER
# How many seconds to pause after stopping a daemon
# export HADOOP_STOP_TIMEOUT=5
# Where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/tmp
# Default log4j setting for interactive commands
# Java property: hadoop.root.logger
# export HADOOP_ROOT_LOGGER=INFO,console
# Default log4j setting for daemons spawned explicitly by
# --daemon option of hadoop, hdfs, mapred and yarn command.
# Java property: hadoop.root.logger
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
# Default log level and output location for security-related messages.
# You will almost certainly want to change this on a per-daemon basis via
# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the
# defaults for the NN and 2NN override this by default.)
# Java property: hadoop.security.logger
# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
# Default process priority level
# Note that sub-processes will also run at this level!
# export HADOOP_NICENESS=0
# Default name for the service level authorization file
# Java property: hadoop.policy.file
# export HADOOP_POLICYFILE="hadoop-policy.xml"
#
# NOTE: this is not used by default! <-----
# You can define variables right here and then re-use them later on.
# For example, it is common to use the same garbage collection settings
# for all the daemons. So one could define:
#
# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
#
# .. and then use it as per the b option under the namenode.
###
# Secure/privileged execution
###
#
# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
# on privileged ports. This functionality can be replaced by providing
# custom functions. See hadoop-functions.sh for more information.
#
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
# export JSVC_HOME=/usr/bin
#
# This directory contains pids for secure and privileged processes.
#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
#
# This directory contains the logs for secure and privileged processes.
# Java property: hadoop.log.dir
# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
#
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
# ends up being a bit bogus. Therefore, by default, the code will
# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
# export HADOOP_SECURE_IDENT_PRESERVE="true"
###
# NameNode specific parameters
###
# Default log level and output location for file system related change
# messages. For non-namenode daemons, the Java property must be set in
# the appropriate _OPTS if one wants something other than INFO,NullAppender
# Java property: hdfs.audit.logger
# export HDFS_AUDIT_LOGGER=INFO,NullAppender
# Specify the JVM options to be used when starting the NameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# a) Set JMX options
# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# b) Set garbage collection logs
# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# c) ... or set them directly
# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
# this is the default:
# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
###
# SecondaryNameNode specific parameters
###
# Specify the JVM options to be used when starting the SecondaryNameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS"
###
# DataNode specific parameters
###
# Specify the JVM options to be used when starting the DataNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
# This will replace the hadoop.id.str Java property in secure mode.
# export HDFS_DATANODE_SECURE_USER=hdfs
# Supplemental options for secure datanodes
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server"
###
# NFS3 Gateway specific parameters
###
# Specify the JVM options to be used when starting the NFS3 Gateway.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_NFS3_OPTS=""
# Specify the JVM options to be used when starting the Hadoop portmapper.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_PORTMAP_OPTS="-Xmx512m"
# Supplemental options for priviliged gateways
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server"
# On privileged gateways, user to run the gateway as after dropping privileges
# This will replace the hadoop.id.str Java property in secure mode.
# export HDFS_NFS3_SECURE_USER=nfsserver
###
# ZKFailoverController specific parameters
###
# Specify the JVM options to be used when starting the ZKFailoverController.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_ZKFC_OPTS=""
###
# QuorumJournalNode specific parameters
###
# Specify the JVM options to be used when starting the QuorumJournalNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_JOURNALNODE_OPTS=""
###
# HDFS Balancer specific parameters
###
# Specify the JVM options to be used when starting the HDFS Balancer.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_BALANCER_OPTS=""
###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_MOVER_OPTS=""
###
# Router-based HDFS Federation specific parameters
# Specify the JVM options to be used when starting the RBF Routers.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_DFSROUTER_OPTS=""
###
# HDFS StorageContainerManager specific parameters
###
# Specify the JVM options to be used when starting the HDFS Storage Container Manager.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HDFS_STORAGECONTAINERMANAGER_OPTS=""
###
# Advanced Users Only!
###
#
# When building Hadoop, one can add the class paths to the commands
# via this special env var:
# export HADOOP_ENABLE_BUILD_PATHS="true"
#
# To prevent accidents, shell commands be (superficially) locked
# to only allow certain users to execute certain subcommands.
# It uses the format of (command)_(subcommand)_USER.
#
# For example, to limit who can execute the namenode command,
# export HDFS_NAMENODE_USER=hdfs
###
# Registry DNS specific parameters
###
# For privileged registry DNS, user to run as after dropping privileges
# This will replace the hadoop.id.str Java property in secure mode.
# export HADOOP_REGISTRYDNS_SECURE_USER=yarn
# Supplemental options for privileged registry DNS
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HADOOP_REGISTRYDNS_SECURE_EXTRA_OPTS="-jvm server"
export YARN_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
데이터 노드로 사용될 호스트들을 나열한다.
host_01
host_02
host_03.haedongg.net
192.168.0.04
fs.defaultFS
hdfs://coxspace
ha.zookeeper.quorum
hadoopm01.coxspace.biz:2181,hadoopm02.coxspace.biz:2181,hadoop03.coxspace.biz:2181
### $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<details><summary>(hdfs-site.xml)</summary>
```xml
<pre><code class="xml">
<property>
<name>dfs.nameservices</name>
<value>coxspace</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoopm01.coxspace.biz8485;hadoopm02.coxspace.biz:8485;hadoopm03.coxspace.biz:8485/coxspace</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.coxspace</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.ha.namenodes.coxspace</name>
<value>namenode1,namenode2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.coxspace.namenode1</name>
<value>hadoopm01.coxspace.biz:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.coxspace.namenode2</name>
<value>hadoopm02.coxspace.biz:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.coxspace.namenode1</name>
<value>hadoopm01.coxspace.biz:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.coxspace.namenode2</name>
<value>hadoopm02.coxspace.biz:50070</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.coxspace.namenode1</name>
<value>hadoopm01.coxspace.biz:8022</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.coxspace.namenode2</name>
<value>hadoopm02.coxspace.biz:8022</value>
</property>
<property>
<name>dfs.namenode.http-address.coxspace.namenode1</name>
<value>hadoopm01.coxspace.biz:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.coxspace.namenode2</name>
<value>hadoopm02.coxspace.biz:9870</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/xvdb/hadoop/namenode,/xvdc/hadoop/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/xvdb/hadoop/datanode,/xvdc/hadoop/datanode</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/xvdb/hadoop/journalnode</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<property>
<name>dfs.block.size</name>
<value>67108864</value>
<!-- HDFS의 block size, Byte 단위. 8MB=8,388,608 / 16MB=16,777,216 / 32MB=33,554,432 / 64MB=67,108,864 / 128MB=134,217,728-->
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>10080</value>
<!-- value의 값(=분)에 따라 휴지통이 비워진다. -->
</property>
</configuration>
</code></pre>
<configuration>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>zlib.compress.level</name>
<value>DEFAULT_COMPRESSION</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoopm03.coxspace.biz:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoopm03.coxspace.biz:19888</value>
</property>
<property>
<name>mapreduce.jobhistory.admin.address</name>
<value>hadoopm03.coxspace.biz:10033</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>1</value>
</property>
</configuration>
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoopm01.coxspace.biz:2181,hadoopm02.coxspace.biz:2181,hadoopm03.coxspace.biz:2181</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.client.failover-sleep-base-ms</name>
<value>100</value>
</property>
<property>
<name>yarn.client.failover-sleep-max-ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarnRM</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm3</name>
<value>hadoopm03.coxspace.biz:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm3</name>
<value>hadoopm03.coxspace.biz:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm3</name>
<value>hadoopm03.coxspace.biz:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm3</name>
<value>hadoopm03.coxspace.biz:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm3</name>
<value>hadoopm03.coxspace.biz:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm3</name>
<value>hadoopm03.coxspace.biz:8090</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoopm02.coxspace.biz:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoopm02.coxspace.biz:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoopm02.coxspace.biz:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>hadoopm02.coxspace.biz:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoopm02.coxspace.biz:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>hadoopm02.coxspace.biz:8090</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm3,rm2</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>8</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
</property>
<property>
<name>hadoop.registry.zk.quorum</name>
<value>hadoopm01.coxspace.biz:2181,hadoopm02.coxspace.biz:2181,hadoopm03.coxspace.biz:2181</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.webapp.address</name>
<value>hadoopm03.coxspace.biz:8188</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
</code></pre>