adding node exporter, loki, and promtail

This commit is contained in:
ai-dev
2024-10-30 20:26:28 +01:00
parent b1dbbe1a8b
commit b840c65795
87 changed files with 2372 additions and 2366 deletions

View File

@ -0,0 +1,11 @@
#!/command/with-contenv bashio
# ==============================================================================
# Home Assistant Community Add-on: Prometheus
# Configures Prometheus
# ==============================================================================
echo "${SUPERVISOR_TOKEN}" > '/run/home-assistant.token'
mkdir -p /data/prometheus
find /data/prometheus -not -perm 0644 -type f -exec chmod 0660 {} \;
find /data/prometheus -not -perm 0755 -type d -exec chmod 0770 {} \;
chown -R prometheus:prometheus /data/prometheus

View File

@ -0,0 +1,40 @@
---
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/share/prometheus/rules/*.yml"
- "/share/prometheus/rules/*.yaml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "home-assistant"
scrape_interval: 60s
metrics_path: /core/api/prometheus
# Long-Lived Access Token
bearer_token_file: "/run/home-assistant.token"
scheme: http
static_configs:
- targets: ["supervisor:80"]

View File

@ -0,0 +1,11 @@
#!/command/with-contenv bashio
# ==============================================================================
# Home Assistant Community Add-on: Prometheus
# Take down the S6 supervision tree when Prometheus fails
# ==============================================================================
if [[ "${1}" -ne 0 ]] && [[ "${1}" -ne 256 ]]; then
bashio::log.warning "Prometheus config crashed, halting add-on"
exec /run/s6/basedir/bin/halt
fi
bashio::log.info "Prometheus config stopped, restarting..."

View File

@ -0,0 +1,13 @@
#!/command/with-contenv bashio
bashio::log.info 'Starting prometheus config generator...'
if ! bashio::fs.directory_exists /share/prometheus/targets; then
mkdir -p /share/prometheus/targets
chown -R prometheus:prometheus /share/prometheus/targets
fi
cd /opt/prometheus-configgen || bashio::exit.nok \
"Could not change working directory for prometheus-configgen."
# Run Prometheus
exec s6-setuidgid prometheus python3 combiner

View File

@ -0,0 +1,11 @@
#!/command/with-contenv bashio
# ==============================================================================
# Home Assistant Community Add-on: Prometheus
# Take down the S6 supervision tree when Prometheus fails
# ==============================================================================
if [[ "${1}" -ne 0 ]] && [[ "${1}" -ne 256 ]]; then
bashio::log.warning "Prometheus crashed, halting add-on"
exec /run/s6/basedir/bin/halt
fi
bashio::log.info "Prometheus stopped, restarting..."

View File

@ -0,0 +1,46 @@
#!/command/with-contenv bashio
# shellcheck disable=SC2191
# ==============================================================================
# Home Assistant Community Add-on: Prometheus
# Runs the Prometheus Server
# ==============================================================================
declare -a options
declare name
declare value
bashio::log.info 'Starting prometheus...'
options+=(--config.file="/etc/prometheus/prometheus.yml" )
options+=(--storage.tsdb.path="/data/prometheus" )
options+=(--web.console.libraries="/usr/share/prometheus/console_libraries" )
options+=(--web.console.templates="/usr/share/prometheus/consoles" )
options+=(--web.route-prefix="/" )
options+=(--web.external-url="http://localhost:9090$(bashio::addon.ingress_entry)/" )
options+=(--web.enable-lifecycle )
# Load custom environment variables
for var in $(bashio::config 'env_vars|keys'); do
name=$(bashio::config "env_vars[${var}].name")
value=$(bashio::config "env_vars[${var}].value")
bashio::log.info "Setting ${name} to ${value}"
export "${name}=${value}"
done
if ! bashio::fs.directory_exists /data/prometheus; then
mkdir -p /data/prometheus
chown prometheus:prometheus /data/prometheus
fi
if ! bashio::fs.directory_exists /share/prometheus/rules; then
mkdir -p /share/prometheus/rules
chown -R prometheus:prometheus /share/prometheus/rules
fi
if ! bashio::fs.directory_exists /share/prometheus/targets; then
mkdir -p /share/prometheus/targets
chown -R prometheus:prometheus /share/prometheus/targets
fi
yq -i eval-all '. as $item ireduce ({}; . * $item)' /etc/prometheus/prometheus.yml /share/prometheus/prometheus.yaml
# Run Prometheus
exec s6-setuidgid prometheus /usr/local/bin/prometheus "${options[@]}"

View File

@ -0,0 +1,100 @@
import sys
import asyncio
import aionotify
import yaml
import os
import tempfile
import requests
from yaml_include import Constructor
def generateConfig():
Constructor.add_to_loader_class(
loader_class=yaml.FullLoader, base_dir="/share/prometheus/"
)
with open("prometheus.template") as f:
data = yaml.load(f, Loader=yaml.FullLoader)
data["scrape_configs"] = (
data[".scrape_configs_static"] + data[".scrape_configs_included"]
)
del data[".scrape_configs_static"]
del data[".scrape_configs_included"]
return yaml.dump(data, default_flow_style=False, default_style="")
def testConfig(config):
tmp = None
result = False
try:
tmp = tempfile.NamedTemporaryFile()
with open(tmp.name, "w") as f:
f.write(config)
r = os.system("promtool check config " + tmp.name + "> /dev/null")
result = r == 0
except:
print("Failed to validate")
raise
if not result:
raise Exception("validation error")
return result
def writeConfig(config, file):
try:
with open(file, "w") as f:
f.write(config)
r = requests.post(url="http://localhost:9090/-/reload", data={})
except:
print("Exception")
loop = asyncio.get_event_loop()
paths_to_watch = ["/share/prometheus/targets/"]
lock = asyncio.Lock()
async def compile():
if lock.locked() == False:
await lock.acquire()
try:
config = generateConfig()
testConfig(config)
writeConfig(config, "/etc/prometheus/prometheus.yml")
print("Compiled")
except:
pass
finally:
lock.release()
async def watcher():
asyncio.create_task(compile())
filewatch = aionotify.Watcher()
for path in paths_to_watch:
filewatch.watch(
path,
aionotify.Flags.MODIFY | aionotify.Flags.CREATE | aionotify.Flags.DELETE,
)
print(path)
await filewatch.setup(loop)
while True:
event = await filewatch.get_event()
sys.stdout.write("Got event: %s\n" % repr(event))
asyncio.create_task(compile())
filewatch.close()
def main():
try:
loop.run_until_complete(watcher())
finally:
# loop.close()
pass
if __name__ == "__main__":
main()

View File

@ -0,0 +1,38 @@
---
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/share/prometheus/rules/*.yaml"
.scrape_configs_included: !include targets/*.yaml
.scrape_configs_static:
- job_name: 'home-assistant'
scrape_interval: 60s
metrics_path: /core/api/prometheus
# Long-Lived Access Token
bearer_token_file: '/run/home-assistant.token'
scheme: http
static_configs:
- targets: ['supervisor:80']
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']

View File

@ -0,0 +1,5 @@
aionotify
pyyaml-include>=1.2
PyYAML>=5.3.1
requests>=2.23.0
yaml_include