Prometheus는 이벤트 모니터링 및 알림 등에 사용되는 오픈소스 시계열 DB이다. 라이선스는 APL2.0이다. PromQL을 이용해 데이터에 접근할 수 있다.
# Prometheus user 생성
useradd haedong
# prometheus Log 디렉토리 생성 및 권한 부여
mkdir -p /var/log/prometheus
chown -R haedong /var/log/prometheus
# TSDB 디렉토리 생성 및 권한 부여
mkdir -p /xvdb/prometheus/tsdb
chown -R haedong /xvdb/prometheus
# prometheus user 환셩 변수
sudo -i -u haedong
cat <<EOF | sudo tee /home/haedong/.bash_profile
export HOME=/home/haedong
export PROMETHEUS_HOME=$HOME/prometheus
export PATH=$PATH:$PROMETHEUS_HOME/bin
EOF
source ~/.bash_profile
wget https://github.com/prometheus/prometheus/releases/download/v2.53.3/prometheus-2.53.3.linux-amd64.tar.gz
tar -xvzf prometheus-2.53.3.linux-amd64.tar.gz
mkdir -p $HOME/apps
mv prometheus-2.53.3.linux-amd64 $HOME/apps/
ln -s /home/haedong/apps/prometheus-2.53.3.linux-amd64 $HOME/prometheus
mkdir $PROMETHEUS_HOME/bin
mv $PROMETHEUS_HOME/prometheus $PROMETHEUS_HOME/bin/
mv $PROMETHEUS_HOME/promtool $PROMETHEUS_HOME/bin/
mkdir $PROMETHEUS_HOME/conf
mv $PROMETHEUS_HOME/prometheus.yml $PROMETHEUS_HOME/conf
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# 기본 설정
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "K8s Master node status"
static_configs:
- targets:["HOST.DOMAIN.NAME:60001","IP.ADDR.NUM:60001"]
# 수집 대상에 TLS 설정, 인증 설정 등이 적용 돼있을 경우
- job_name: "Node Information https"
scheme: https
metrics_path: '/actuator/nodes'
authorization:
type: Bearer
credentials: <your_token>
tls_config:
insecure_skip_verify: true
static_configs:
- targets:["HOST.DOMAIN.NAME:7777","IP.ADDR.NUM:7777"]
tls_server_config:
# Certificate and key files for server to use to authenticate to client.
cert_file: /home/haedong/prometheus/certs/haedongg.net.crt
key_file: /home/haedong/prometheus/certs/haedongg.net.key
# TLS를 적용하지 않는 경우 --web.config.file 플래그는 넣지 않는다. 재기동 없이 설정 변경 적용을 위해 --web.enable-lifecycle를 추가한다.
nohup $PROMETHEUS_BIN --config.file=$PROMETHEUS_HOME/conf/prometheus.yml \
--web.listen-address="0.0.0.0:9443" --web.config.file=$/PROMETHEUS_HOME/conf/web.yml \
--storage.tsdb.path=/xvdb/prometheus/tsdb --storage.tsdb.retention.time=30d --storage.tsdb.retention.size=1GB \
--web.enable-lifecycle \
--log.level=info > /var/log/prometheus/prometheus.log 2>&1 &
# vi $PROMETHEUS_HOME/prometheus.sh
#!/bin/bash
# Prometheus variables
PROMETHEUS_HOME="/home/haedong/prometheus"
PROMETHEUS_BIN="$PROMETHEUS_HOME/bin/prometheus"
PROMETHEUS_CONFIG="$PROMETHEUS_HOME/conf/prometheus.yaml"
TSDB_DIR="/xvdb/prometheus/tsdb"
PID_FILE="$PROMETHEUS_HOME/prometheus.pid"
PROMETHEUS_LOG_DIR=/var/log/prometheus
TSDB_RETENTION_PERIOD=30d
TSDB_RETENTION_SIZE=1GB
# Function to start Prometheus
start_prometheus() {
if [ -f "$PID_FILE" ]; then
echo "Prometheus is already running (PID $(cat $PID_FILE))."
else
echo "Starting Prometheus..."
nohup $PROMETHEUS_BIN --config.file=$PROMETHEUS_CONFIG \
--web.listen-address="0.0.0.0:9443" --web.config.file=$PROMETHEUS_CNFIG/web-conf.yaml \
--storage.tsdb.path=$TSDB_DIR --storage.tsdb.retention.time=$TSDB_RETENTION_PERIOD --storage.tsdb.retention.size=$TSDB_RETENTION_SIZE \
--web.enable-lifecycle \
--log.level=info > $PROMETHEUS_LOG_DIR/prometheus.log 2>&1 &
echo $! > $PID_FILE
echo "Prometheus started with PID $(cat $PID_FILE)."
fi
}
# Function to stop Prometheus
stop_prometheus() {
if [ -f "$PID_FILE" ]; then
PID=$(cat $PID_FILE)
echo "Stopping Prometheus (PID $PID)..."
kill $PID
rm -f $PID_FILE
echo "Prometheus stopped."
else
echo "Prometheus is not running."
fi
}
# Function to restart Prometheus
restart_prometheus() {
stop_prometheus
start_prometheus
}
# Function to check status
status_prometheus() {
if [ -f "$PID_FILE" ]; then
echo "Prometheus is running (PID $(cat $PID_FILE))."
else
echo "Prometheus is not running."
fi
}
# Main script execution
case "$1" in
start)
start_prometheus
;;
stop)
stop_prometheus
;;
restart)
restart_prometheus
;;
status)
status_prometheus
;;
*)
echo "Usage: $0 {start|stop|restart|status}"
exit 1
;;
esac
curl -X POST https://prometheus.haedongg.net:9443/-/reload