adding one-hit monitoring with prometheus, influxdb, telegraf and automatic dashboards

2018-06-10 15:44:52 +02:00 · 2018-06-10 15:44:52 +02:00 · 14630e7592
commit 14630e7592
parent 9487fb10aa
7 changed files with 1339 additions and 2133 deletions
--- a/deploy_all.sh
+++ b/deploy_all.sh
@ -0,0 +1,13 @@
+## INSTALL docker-compose
+## uncomment if you don't have docker-compose installed  #sudo curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose
+## uncomment if you don't have docker-compose installed  #sudo chmod +x /usr/local/bin/docker-compose
+
+# START docker-compose
+docker-compose up -d 
+
+# ADD DATASOURCES AND DASHBOARDS
+echo "adding dashboards..."
+docker exec -it -u 0 grafana /var/lib/grafana/ds/add_dashboards.sh
+
+echo "adding datasources..."
+docker exec -it -u 0 grafana /var/lib/grafana/ds/add_datasources.sh
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -2,6 +2,7 @@ version: '3'

 networks:
  public: {}
+  private: {}

 volumes:
  grafana_lib: {}
@ -15,6 +16,7 @@ services:
      - "8086:8086"
    networks:
      - public
+      - private
    volumes:
      - ./data/influxdb:/var/lib/influxdb
    environment:
@ -27,13 +29,13 @@ services:
      image: grafana/grafana:5.1.3
      container_name: grafana
      ports:
-        - "3000:3000"
+        - "3001:3000"
      networks:
        - public
+        - private
      volumes:
        - grafana_lib:/var/lib/grafana
-        - grafana_ds:/var/lib/grafana/ds:rw
-        - ${PWD}/grafana/add_datasources.sh:/var/lib/grafana/ds/add_datasources.sh
+        - ${PWD}/grafana/:/var/lib/grafana/ds/
      environment:
        GF_AUTH_ANONYMOUS_ENABLED: "true"
        GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
@ -52,3 +54,38 @@ services:
    environment:
      # real influx host
      INFLUXDB_URI: "http://localhost:8086"
+
+  prometheus:
+    image: quay.io/prometheus/prometheus:v2.0.0
+    container_name: prometheus
+    volumes:
+     - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+    networks:
+     - private
+    command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus"
+    ports:
+     - 9090:9090
+    depends_on:
+     - node_exporter
+
+  node_exporter:
+    image: prom/node-exporter:v0.13.0
+    container_name: node_exporter
+    volumes:
+      - ${PWD}/node_exporter/justrun.py:/justrun.py:rw
+      - ${PWD}/node_exporter/hmon:/hmon:rw
+      - ${PWD}/node_exporter/smoothlogging:/smoothlogging:rw
+      - ${PWD}/node_exporter/textfile_collector:/var/lib/node_exporter/textfile_collector/
+    networks:
+      - public
+      - private
+    environment:
+      DOGSNAME:      "Gula&Bodka"
+    command:
+      - "--collector.textfile"
+      - "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector/"
+    expose:
+      - 9100
+    ports:
+      - 9100:9100
+
--- a/grafana/add_dashboards.sh
+++ b/grafana/add_dashboards.sh
@ -50,7 +50,8 @@ wait_for_api() {
 replace_datasource() {
   local dashboard_file=$1
   local datasource_name=$2
-   cmd="sed -i.bak_remove \"s/\\\${DS_INFLUXDB}/${datasource_name}/g\" ${dashboard_file}"
+   local old_datasource_name=$3
+   cmd="sed -i.bak_remove \"s/${old_datasource_name}/${datasource_name}/g\" ${dashboard_file}"
   eval ${cmd} || return 1
   return 0
 }
@ -58,22 +59,22 @@ replace_datasource() {
 install_dashboards() {
  local dashboard

-  for dashboard in dashboards/*.json
+  for dashboard in /var/lib/grafana/ds/dashboards/*.json

  do
  if [[ $(grep "\"name\": \"DS_INFLUXDB\"," ${dashboard}) ]]; then
    echo -e "${PURPLE}Dashboard ${dashboard} seems to be for InfluxDB datasource${NC}"
+    old_datasource_name="\\\${DS_INFLUXDB}"
    datasource_name="influxdb"
  fi 
  if [[ $(grep "\"name\": \"DS_PROMETHEUS\"," ${dashboard}) ]]; then
    echo -e "${PURPLE}Dashboard ${dashboard} seems to be for Prometheus datasource${NC}"
+    old_datasource_name="\\\${DS_PROMETHEUS}"
    datasource_name="prometheus"
  fi 
    if [[ -f "${dashboard}" ]]; then
      echo -e "${LCYAN}Installing dashboard ${dashboard}${NC}"
-      replace_datasource ${dashboard} ${datasource_name}
-      # backup will be created before wrapping dashboard ^
-      #echo -e "{\"dashboard\": `cat $dashboard`}" > "${dashboard}.wrapped"
+      replace_datasource ${dashboard} ${datasource_name} ${old_datasource_name}
      cp ${dashboard} ${dashboard}.wrapped
      sed -i '1s/^/{"dashboard":\n/' ${dashboard}.wrapped
      echo "}" >> ${dashboard}.wrapped
@ -84,7 +85,6 @@ install_dashboards() {
        echo -e "\n** ${RED}installation of: ${PURPLE}\"${dashboard}\"${RED} failed **${NC}"
      fi
    fi
-  #rm ${dashboard}.wrapped
  done
 }

@ -94,4 +94,5 @@ configure_grafana() {
 }

 configure_grafana
+rm -vf /var/lib/grafana/ds/dashboards/*.{wrapped,bak_remove}

--- a/grafana/add_datasources.sh
+++ b/grafana/add_datasources.sh
@ -2,7 +2,7 @@
 #set -e

 # ADD INFLUXDB DATASOURCE
-curl -s -v -H "Content-Type: application/json" \
+curl -s -H "Content-Type: application/json" \
    -XPOST http://admin:admin@localhost:3000/api/datasources \
    -d @- <<EOF
 {
@ -18,7 +18,7 @@ curl -s -v -H "Content-Type: application/json" \
 EOF

 ## ADD PROMETHEUS DATASOURCE
-curl -s -v -H "Content-Type: application/json" \
+curl -s -H "Content-Type: application/json" \
    -XPOST http://admin:admin@localhost:3000/api/datasources \
    -d @- <<EOF
 {
--- a/grafana/dashboards/prometheus-2-stats.json
+++ b/grafana/dashboards/prometheus-2-stats.json
--- a/grafana/dashboards/prometheus-system.json
+++ b/grafana/dashboards/prometheus-system.json
--- a/prometheus/prometheus.yml
+++ b/prometheus/prometheus.yml
@ -0,0 +1,34 @@
+# my global config
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
+  # scrape_timeout is set to the global default (10s).
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+      monitor: 'codelab-monitor'
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+  # - "first.rules"
+  # - "second.rules"
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+
+    static_configs:
+      - targets: ['localhost:9090']
+  - job_name: "node"
+    scrape_interval: "15s"
+    static_configs:
+      - targets: ['node_exporter:9100']