这是我的
我觉得我尝试了一切,但还是失败了。我正在尝试制作一簇塔诺斯、普罗米修斯和格拉法纳。我写了一个脚本,它在终端中像一个符咒一样工作,但未能作为服务systemd
启动。我的环境是:
Ubuntu 20.04.2
Docker 20.10.5
我的文件依赖项是:
这是我的脚本内容:
#!/bin/bash
# Script to build monitoring cluster with Thanos support.
# Actions: deploy | destroy
#
#
# Usage: ./setup deploy [ prom|sidecar|querier|grafana|all ] | destroy [prom|sidecar|querier|grafana|volume|network|all]
#
#set -x
# Params
#*******************************************************************************************************#
action="$1"
component="$2"
#*******************************************************************************************************#
#---------------------------------Creating volumes for persistant data storage--------------------------#
create_volume() {
echo "--> Creating persistent volumes for prometheus servers"
for item in 1 2 3
do
mkdir -p $(pwd)/prometheusStorage$item
done
}
#---------------------------------------End of creating-------------------------------------------------#
#*******************************************************************************************************#
#----------------------------------------Creating docker network----------------------------------------#
create_docker_network() {
docker network create thanos &> /dev/null
}
#----------------------------------------End of creating network----------------------------------------#
#*******************************************************************************************************#
#----------------------------------------Deploying prometheus instances---------------------------------#
deploy_prom() {
echo "--> Deploying prometheus instances"
for item in 1 2 3
do
echo "--> Deploying prometheus instance #$item"
docker run -d --net=thanos --rm -v $(pwd)/prometheus0$item.yml:/etc/prometheus/prometheus.yml -p 909$item:909$item -v $(pwd)/prometheusStorage$item:/prometheus -u root --name prometheus0$item quay.io/prometheus/prometheus
--config.file=/etc/prometheus/prometheus.yml
--storage.tsdb.path=/prometheus
--web.listen-address=:909$item
--web.enable-lifecycle
--storage.tsdb.retention.time=5d
--storage.tsdb.min-block-duration=5m
--storage.tsdb.max-block-duration=5m
--web.enable-admin-api &> /dev/null
sleep 3
done
#-------------------------------->Checking for launched Prometheus containers
curl http://localhost:9091 &> /dev/null
prom01=$?
sleep 1
curl http://localhost:9092 &> /dev/null
prom02=$?
sleep 1
curl http://localhost:9093 &> /dev/null
prom03=$?
sleep 1
if [[ prom01 -eq 0 && prom02 -eq 0 && prom03 -eq 0 ]] ; then
echo "--> Prometheus 01, 02 ,03 got deployed on 9091,9092,9093 port respectively"
else
echo "--> Error occurred while deploying prometheus server"
exit 1
fi
echo "*"
#------------------------------->End of checking
}
#----------------------------------------End of deploying instances-------------------------------------#
#*******************************************************************************************************#
#----------------------------------------Deploying sidecar instances------------------------------------#
deploy_sidecar() {
if [ ! -d "$(pwd)/object-storage" ]; then
mkdir object-storage
fi
echo "--> Deploying thanos sidecar for each prometheus instance in the cluster"
for item in 1 2 3
do
echo "--> Deploying sidecar for prometheus instance #$item"
docker run -d --rm --net=thanos -v $(pwd)/prometheus0$item.yml:/etc/prometheus/prometheus.yml -v $(pwd)/bucket_config.yaml:/tmp/bucket_config.yaml --name prometheus0$item-sidecar -u root
-v $(pwd)/prometheusStorage$item:/tmp/prometheusStorage
-v $(pwd)/object-storage:/object-storage
quay.io/thanos/thanos:main-2021-03-22-d173bcb9 sidecar
--http-address 0.0.0.0:1909$item
--grpc-address 0.0.0.0:1919$item
--reloader.config-file /etc/prometheus/prometheus.yml
--prometheus.url http://prometheus0$item:909$item
--tsdb.path /tmp/prometheusStorage
--objstore.config-file /tmp/bucket_config.yaml &> /dev/null
SCRC=$?
sleep 3
done
echo "--> Using Local volume as a object storage"
if [ $SCRC -eq 0 ]; then
echo "--> All sidecars got deployed successfully for all prometheus instances"
else
echo "--> Error while deploying sidecars"
exit 1
fi
echo "*"
}
#----------------------------------------End of deploying instances-------------------------------------#
#*******************************************************************************************************#
#----------------------------------------Deploying querier instances------------------------------------#
deploy_querier(){
docker run -d --rm --net=thanos --name thanos-querier -p 29090:29090 quay.io/thanos/thanos:main-2021-03-22-d173bcb9 query
--http-address 0.0.0.0:29090
--query.replica-label replica
--store prometheus01-sidecar:19191
--store prometheus02-sidecar:19192
--store prometheus03-sidecar:19193 &> /dev/null
if [ $? -eq 0 ]; then
echo "--> Deployed thanos querier component"
else
echo "--> Error while deploying thanos querier component"
exit 1
fi
echo "*"
}
#----------------------------------------End of deploying instance--------------------------------------#
#*******************************************************************************************************#
#--------------------------------------------Deploying grafana------------------------------------------#
deploy_grafana() {
echo "--> Deploying single instance of grafana"
docker run -d --name grafana --net=thanos -p 3000:3000 quay.io/bitnami/grafana:latest &> /dev/null
if [ $? -eq 0 ]; then
echo "--> Grafana is up and running on 3000 port"
else
echo "--> Error while deploying grafana"
exit 1
fi
echo "*"
echo "--> Cluster is up and running"
}
#--------------------------------------------End of deploying grafana-----------------------------------#
#*******************************************************************************************************#
case "$action" in
deploy)
case "$component" in
prom)
# preparing persistance volumes.
create_volume
# creating a namespace for docker
create_docker_network
# deploying prometheuse server:
echo "*"
deploy_prom
;;
sidecar)
deploy_sidecar
;;
querier)
# Deploying thanos querier which queries the thanos side car endpoint over gRPC
deploy_querier
;;
grafana)
# deploy grafana instance
deploy_grafana
;;
all)
create_volume
create_docker_network
deploy_prom
deploy_sidecar
deploy_querier
deploy_grafana
;;
*)
echo "Choose an component to deploy from prom|sidecar|querier|grafana|all"
exit 1
;;
esac
;;
destroy)
case "$component" in
prom)
for container in prometheus01 prometheus02 prometheus03
do
echo "--> Removing $container residue."
docker container stop $container &> /dev/null
done
sleep 3
docker network ls | grep thanos &> /dev/null
if [ $? -eq 0 ]; then
echo "--> Removing docker network [thanos]"
docker network rm thanos &> /dev/null
fi
;;
sidecar)
for container in prometheus01-sidecar prometheus02-sidecar prometheus03-sidecar
do
echo "--> Removing $container residue."
docker container stop $container &> /dev/null
done
;;
querier)
echo "--> Removing querier residue."
docker container stop thanos-querier &> /dev/null
;;
grafana)
echo "--> Removing grafana residue."
docker container stop grafana &> /dev/null
docker ps -a | grep grafana &>/dev/null
if [ $? -eq 0 ]; then
docker rm -f grafana &> /dev/null
fi
;;
volume)
for item in prometheusStorage1 prometheusStorage2 prometheusStorage3 object-storage
do
if [ -d "$item" ]; then
echo "--> Deleting mounted storage volume: $item"
rm -rf $item
fi
done
;;
network)
docker network ls | grep thanos &> /dev/null
if [ $? -eq 0 ]; then
echo "--> Removing docker network [thanos]"
docker network rm thanos &> /dev/null
fi
clear
;;
all)
for container in prometheus01 prometheus02 prometheus03 prometheus01-sidecar prometheus02-sidecar prometheus03-sidecar thanos-querier grafana
do
docker ps | grep $container &> /dev/null
UPRC=$?
if [ "$UPRC" -eq 0 ]; then
for container in prometheus01 prometheus02 prometheus03 prometheus01-sidecar prometheus02-sidecar prometheus03-sidecar thanos-querier grafana
do
echo "--> Removing $container residue"
docker container stop $container &> /dev/null
done
CONTAINERRC=$?
if [ "$CONTAINERRC" -eq 0 ]; then
docker ps -a | grep grafana | awk '{print $1}' | xargs docker rm -f &>/dev/null
if [ $? -eq 0 ]; then
docker rm -f grafana &> /dev/null
fi
echo "*"
echo "--> All containers drained out"
else
echo "--> Error while stopping grafana container"
exit 1
fi
else
CLUSTERDOWN=yes
fi
done
docker network ls | grep thanos &> /dev/null
if [ $? -eq 0 ]; then
echo "--> Removing docker network [thanos]"
docker network rm thanos &> /dev/null
fi
if [ ! -z "$CLUSTERDOWN" ]; then
echo "--> Cluster is already down, nothing to tear off"
exit 0
fi
;;
*)
echo "Choose an component to destroy from prom|sidecar|querier|grafana|all"
exit 1
;;
esac
;;
*)
echo "Usage: $0 deploy [ prom|sidecar|querier|grafana|all ] | destroy [ prom|sidecar|querier|grafana|all ]"
exit 1
;;
esac
这是我的systemd
服务
[Unit]
Description=ethtool script
[Service]
ExecStart=/usr/local/setup.sh deploy all
[Install]
WantedBy=multi-user.target
问题是,如果脚本作为服务启动,它就无法运行prometheus docker镜像,但如果我从终端运行它,它的工作方式就像魅力一样。其他图像运行正常。我不知道问题出在哪里。感谢您的帮助。提前感谢
编辑
外壳检查输出
$ shellcheck myscript
Line 23:
mkdir -p $(pwd)/prometheusStorage$item
^-- SC2046: Quote this to prevent word splitting.
Line 44:
docker run -d --net=thanos --rm -v $(pwd)/prometheus0$item.yml:/etc/prometheus/prometheus.yml -p 909$item:909$item -v $(pwd)/prometheusStorage$item:/prometheus -u root --name prometheus0$item quay.io/prometheus/prometheus
^-- SC2046: Quote this to prevent word splitting.
>> ^-- SC2046: Quote this to prevent word splitting.
Line 87:
docker run -d --rm --net=thanos -v $(pwd)/prometheus0$item.yml:/etc/prometheus/prometheus.yml -v $(pwd)/bucket_config.yaml:/tmp/bucket_config.yaml --name prometheus0$item-sidecar -u root
^-- SC2046: Quote this to prevent word splitting.
>> ^-- SC2046: Quote this to prevent word splitting.
Line 88:
-v $(pwd)/prometheusStorage$item:/tmp/prometheusStorage
^-- SC2046: Quote this to prevent word splitting.
Line 89:
-v $(pwd)/object-storage:/object-storage
^-- SC2046: Quote this to prevent word splitting.
Line 121:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 137:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 199:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 219:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 234:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 241:
for container in prometheus01 prometheus02 prometheus03 prometheus01-sidecar prometheus02-sidecar prometheus03-sidecar thanos-querier grafana
^-- SC2167: This parent loop has its index variable overridden.
Line 246:
for container in prometheus01 prometheus02 prometheus03 prometheus01-sidecar prometheus02-sidecar prometheus03-sidecar thanos-querier grafana
^-- SC2165: This nested loop overrides the index variable of its parent.
Line 254:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 268:
if [ $? -eq 0 ]; then
^-- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly with $?.
Line 272:
if [ ! -z "$CLUSTERDOWN" ]; then
^-- SC2236: Use -n instead of ! -z.
以下是我启动服务时的错误消息:
Mar 24 11:16:06 thanos_server setup.sh[5386]: ++ pwd
Mar 24 11:16:06 thanos_server setup.sh[5386]: + docker run -d --net=thanos --rm -v //prometheus02.yml:/etc/prometheus/prometheus.yml -p 9091:9091 -v //prometheusStorage1:/prometheus -u root --name prometheus01 quay.io/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=:9091 --web.enable-lifecycle --storage.tsdb.retention.time=5d --storage.tsdb.min-block-duration=5m --storage.tsdb.max-block-duration=5m --web.enable-admin
Mar 24 11:16:06 thanos_server setup.sh[5386]: + sleep 3
Mar 24 11:16:09 thanos_server setup.sh[5386]: + for item in 1 2 3
Mar 24 11:16:09 thanos_server setup.sh[5386]: + echo '--> Deploying prometheus instance #3'
Mar 24 11:16:09 thanos_server setup.sh[5386]: --> Deploying prometheus instance #3
Mar 24 11:16:09 thanos_server setup.sh[5386]: ++ pwd
Mar 24 11:16:09 thanos_server setup.sh[5386]: ++ pwd
Mar 24 11:16:09 thanos_server setup.sh[5386]: + docker run -d --net=thanos --rm -v //prometheus03.yml:/etc/prometheus/prometheus.yml -p 9091:9091 -v //prometheusStorage1:/prometheus -u root --name prometheus01 quay.io/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=:9091 --web.enable-lifecycle --storage.tsdb.retention.time=5d --storage.tsdb.min-block-duration=5m --storage.tsdb.max-block-duration=5m --web.enable-admin
Mar 24 11:16:09 thanos_server setup.sh[5386]: + docker run -d --net=thanos --rm -v //prometheus03.yml:/etc/prometheus/prometheus.yml -p 9091:9091 -v //prometheusStorage1:/prometheus -u root --name prometheus01 quay.io/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=:9091 --web.enable-lifecycle --storage.tsdb.retention.time=5d --storage.tsdb.min-block-duration=5m --storage.tsdb.max-block-duration=5m --web.enable-admin
Mar 24 11:16:10 thanos_server setup.sh[5386]: + sleep 3
Mar 24 11:16:13 thanos_server setup.sh[5386]: + curl http://localhost:9091
Mar 24 11:16:13 thanos_server setup.sh[5386]: + prom01=7
Mar 24 11:16:13 thanos_server setup.sh[5386]: + sleep 1
Mar 24 11:16:14 thanos_server setup.sh[5386]: + curl http://localhost:9092
Mar 24 11:16:14 thanos_server setup.sh[5386]: + prom02=7
Mar 24 11:16:14 thanos_server setup.sh[5386]: + sleep 1
Mar 24 11:16:14 thanos_server setup.sh[5386]: + sleep 1
Mar 24 11:16:15 thanos_server setup.sh[5386]: + curl http://localhost:9093
Mar 24 11:16:15 thanos_server setup.sh[5386]: + prom03=7
Mar 24 11:16:15 thanos_server setup.sh[5386]: + sleep 1
Mar 24 11:16:16 thanos_server setup.sh[5386]: + [[ prom01 -eq 0 ]]
Mar 24 11:16:16 thanos_server setup.sh[5386]: + echo '--> Error occurred while deploying prometheus server'
Mar 24 11:16:16 thanos_server setup.sh[5386]: --> Error occurred while deploying prometheus server
Mar 24 11:16:16 thanos_server setup.sh[5386]: + exit 1
Mar 24 11:16:16 thanos_server systemd[1]: monitoring.service: Main process exited, code=exited, status=1/FA
Mar 24 11:16:16 thanos_server systemd[1]: monitoring.service: Failed with result 'exit-code'.
唯一的问题是关于$(pwd)
。我不知道为什么会发生这种情况,但当我用文件的absolute path
更改$(pwd)
时