From 73297e1fa6455c3cefeb9d34eb33c3447c432d4a Mon Sep 17 00:00:00 2001 From: Sebastian Blasiak Date: Thu, 14 Jun 2018 21:19:30 +0200 Subject: [PATCH 1/2] due improvements --- alertmanager/config.yml | 10 + deploy_all.sh | 54 +- docker-compose.yml | 44 +- grafana/add_dashboards.sh | 4 +- .../dashboards/docker-system-monitoring.json | 2175 +++++++++++++++++ grafana/dashboards/node_exporter_metrics.json | 1488 +++++++++++ prometheus/prometheus.yml | 12 + undeploy_all.sh | 20 +- 8 files changed, 3787 insertions(+), 20 deletions(-) create mode 100644 alertmanager/config.yml create mode 100644 grafana/dashboards/docker-system-monitoring.json create mode 100644 grafana/dashboards/node_exporter_metrics.json diff --git a/alertmanager/config.yml b/alertmanager/config.yml new file mode 100644 index 0000000..329fb3b --- /dev/null +++ b/alertmanager/config.yml @@ -0,0 +1,10 @@ +route: + receiver: 'slack' + +receivers: + - name: 'slack' + slack_configs: + - send_resolved: true + username: 'sebastian.blasaik' + channel: '#channel_name' + api_url: 'https://hooks.slack.com/services/' diff --git a/deploy_all.sh b/deploy_all.sh index ec01263..f788ab7 100755 --- a/deploy_all.sh +++ b/deploy_all.sh @@ -1,10 +1,33 @@ +#!/bin/bash + +NC='\033[0m' + +RED='\033[00;31m' +GREEN='\033[00;32m' +YELLOW='\033[00;33m' +BLUE='\033[00;34m' +PURPLE='\033[00;35m' +CYAN='\033[00;36m' +LIGHTGRAY='\033[00;37m' +MAGENTA='\033[00;35m' +LRED='\033[01;31m' +LGREEN='\033[01;32m' +LYELLOW='\033[01;33m' +LBLUE='\033[01;34m' +LPURPLE='\033[01;35m' +LCYAN='\033[01;36m' +WHITE='\033[01;37m' + + + ## INSTALL docker-ce read -r -p "Do you want to install docker? [y/N] " response if [[ "$response" =~ ^([yY][eE][sS]|[yY])+$ ]] then + logout=1 curl -fsSL get.docker.com -o get-docker.sh sudo sh get-docker.sh - sudo usermod -aG docker $(whoami) + sudo systemctl enable docker else echo -e "'no' chosen for docker installation - docker assummed to be already installed" fi @@ -13,18 +36,43 @@ fi read -r -p "Do you want to install docker-compose? [y/N] " response if [[ "$response" =~ ^([yY][eE][sS]|[yY])+$ ]] then + logout=1 sudo curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose else echo -e "'no' chosen for docker-compose installation - docker-compose assummed to be already installed" fi +## LOGOUT IF NEEDED +if [[ "$logout" -eq 1 ]]; then + if [[ $(whoami) != "root" ]]; then + if [[ ! $(groups $(whoami) | egrep -oh 'docker') == "docker" ]]; then + echo -e "${BLUE}your login ${YELLOW}$(whoami)${NC}${BLUE} will be added to "docker" group...${NC}" + if [[ ! $(cat /etc/group | egrep -oh 'docker:') == "docker:" ]]; then + sudo groupadd docker + echo -e "${BLUE}creating group: ${YELLOW}docker${NC}${BLUE}${NC}" + fi + sudo usermod -aG docker $(whoami) + echo -e "${BLUE}you need to logout and login again...${NC}" + echo -e "${BLUE}start the same script and skip docker related part (answer 'no' two times).${NC}" + exit + fi + fi + echo -e "${YELLOW}docker was installed, you need to logout and login${NC}" + echo -e "${RED}login again and start deploy script without docker* installation part (answer no)${NC}" + exit 0 +fi + + # START docker-compose +sudo systemctl restart docker docker-compose up -d # ADD DATASOURCES AND DASHBOARDS +sleep 5 +echo "adding datasources..." +docker exec -it -u 0 grafana /var/lib/grafana/ds/add_datasources.sh + echo "adding dashboards..." docker exec -it -u 0 grafana /var/lib/grafana/ds/add_dashboards.sh -echo "adding datasources..." -docker exec -it -u 0 grafana /var/lib/grafana/ds/add_datasources.sh diff --git a/docker-compose.yml b/docker-compose.yml index 58b9bfc..f70ed0c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,11 +46,12 @@ services: command: ["bash", "/var/lib/grafana/ds/add_datasources.sh"] telegraf: - image: telegraf:latest + image: telegraf:1.6-alpine container_name: telegraf network_mode: "host" volumes: - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro + - /var/run/utmp:/var/run/utmp:ro environment: # real influx host INFLUXDB_URI: "http://localhost:8086" @@ -60,8 +61,12 @@ services: container_name: prometheus volumes: - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + links: + - cadvisor:cadvisor + - alertmanager:alertmanager networks: - private + - public command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus" ports: - 9090:9090 @@ -80,12 +85,41 @@ services: - public - private environment: - DOGSNAME: "Gula&Bodka" - command: - - "--collector.textfile" - - "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector/" + PURPOSE: "PERFORMANCE_MONITORING" expose: - 9100 ports: - 9100:9100 +# + cadvisor: + image: google/cadvisor:v0.28.3 + container_name: cadvisor + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + #- /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux + restart: unless-stopped + expose: + - 8080 + networks: + - private + - public + labels: + org.label-schema.group: "monitoring" + + alertmanager: + image: prom/alertmanager + container_name: alertmanager + ports: + - 9093:9093 + volumes: + - ./alertmanager/:/etc/alertmanager/ + networks: + - private + restart: unless-stopped + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' diff --git a/grafana/add_dashboards.sh b/grafana/add_dashboards.sh index a91166c..85289f9 100755 --- a/grafana/add_dashboards.sh +++ b/grafana/add_dashboards.sh @@ -80,9 +80,9 @@ install_dashboards() { echo "}" >> ${dashboard}.wrapped if grafana_api POST /api/dashboards/db "" "${dashboard}.wrapped"; then - echo -e "\n** ${GREEN}installed ok **${NC}" + echo -e "\n** ${GREEN}installed ok **${NC}" else - echo -e "\n** ${RED}installation of: ${PURPLE}\"${dashboard}\"${RED} failed **${NC}" + echo -e "\n** ${RED}installation of: ${PURPLE}\"${dashboard}\"${RED} failed.**${NC}" fi fi done diff --git a/grafana/dashboards/docker-system-monitoring.json b/grafana/dashboards/docker-system-monitoring.json new file mode 100644 index 0000000..8f27098 --- /dev/null +++ b/grafana/dashboards/docker-system-monitoring.json @@ -0,0 +1,2175 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.0.0-beta2" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Docker and system monitoring", + "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)", + "tags": [], + "style": "dark", + "timezone": "browser", + "editable": true, + "sharedCrosshair": true, + "hideControls": false, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": ".+", + "current": {}, + "datasource": "prometheus", + "hide": 0, + "includeAll": true, + "label": "Container Group", + "multi": true, + "name": "containergroup", + "options": [], + "query": "label_values(container_group)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "auto": true, + "auto_count": 50, + "auto_min": "50s", + "current": { + "text": "auto", + "value": "$__auto_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "text": "auto", + "value": "$__auto_interval", + "selected": true + }, + { + "text": "30s", + "value": "30s", + "selected": false + }, + { + "text": "1m", + "value": "1m", + "selected": false + }, + { + "text": "2m", + "value": "2m", + "selected": false + }, + { + "text": "3m", + "value": "3m", + "selected": false + }, + { + "text": "5m", + "value": "5m", + "selected": false + }, + { + "text": "7m", + "value": "7m", + "selected": false + }, + { + "text": "10m", + "value": "10m", + "selected": false + }, + { + "text": "30m", + "value": "30m", + "selected": false + }, + { + "text": "1h", + "value": "1h", + "selected": false + }, + { + "text": "6h", + "value": "6h", + "selected": false + }, + { + "text": "12h", + "value": "12h", + "selected": false + }, + { + "text": "1d", + "value": "1d", + "selected": false + }, + { + "text": "7d", + "value": "7d", + "selected": false + }, + { + "text": "14d", + "value": "14d", + "selected": false + }, + { + "text": "30d", + "value": "30d", + "selected": false + } + ], + "query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": "Node", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "/([^:]+):.*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + } + ] + }, + "annotations": { + "list": [] + }, + "refresh": "5m", + "schemaVersion": 13, + "version": 57, + "links": [], + "gnetId": 893, + "rows": [ + { + "title": "Dashboard Row", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "", + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "time() - node_boot_time{instance=~\"$server:.*\"}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 31, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count(rate(container_last_seen{name=~\".+\"}[$interval]))", + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Containers", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 1, + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "min((node_filesystem_size{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"} - node_filesystem_free{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"} )/ node_filesystem_size{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"})", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "0.75, 0.90", + "title": "Disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 25, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "((node_memory_MemTotal{instance=~\"$server:.*\"} - node_memory_MemAvailable{instance=~\"$server:.*\"}) / node_memory_MemTotal{instance=~\"$server:.*\"}) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "70, 90", + "title": "Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "decbytes", + "gauge": { + "maxValue": 500000000, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 30, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "(node_memory_SwapTotal{instance=~'$server:.*'} - node_memory_SwapFree{instance=~'$server:.*'})", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "400000000", + "title": "Swap", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(50, 189, 31, 0.18)", + "full": false, + "lineColor": "rgb(69, 193, 31)", + "show": true + }, + "targets": [ + { + "expr": "node_load1{instance=~\"$server:.*\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu{instance=~\"$server:.*\"}))", + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "0.8,0.9", + "title": "Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 150, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "New row", + "panels": [ + { + "aliasColors": { + "SENT": "#BF1B00" + }, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "intervalFactor": 2, + "legendFormat": "RECEIVED", + "refId": "A", + "step": 600 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "SENT", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" + }, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_system_seconds_total[1m]))", + "hide": true, + "intervalFactor": 2, + "legendFormat": "a", + "refId": "B", + "step": 120 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "nur container", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "nur docker host", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "host", + "metric": "", + "refId": "C", + "step": 600 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 120 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1.25 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Panel Title alert", + "noDataState": "keep_state", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$server:.*\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu{instance=~\"$server:.*\"}))", + "intervalFactor": 2, + "refId": "A", + "step": 600 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.25 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1.50", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 850000000000 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Free/Used Disk Space alert", + "noDataState": "keep_state", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Belegete Festplatte": "#BF1B00", + "Free Disk Space": "#7EB26D", + "Used Disk Space": "#7EB26D", + "{}": "#BF1B00" + }, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Used Disk Space", + "yaxis": 1 + } + ], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Used Disk Space", + "refId": "A", + "step": 600 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 850000000000 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Used Disk Space", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": 1000000000000, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 10000000000 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Available Memory alert", + "noDataState": "keep_state", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Available Memory": "#7EB26D", + "Unavailable Memory": "#7EB26D" + }, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_rss{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "D", + "step": 20 + }, + { + "expr": "sum(container_memory_rss{name=~\".+\"})", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "A", + "step": 20 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 20 + }, + { + "expr": "container_memory_rss{id=\"/\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "C", + "step": 20 + }, + { + "expr": "sum(container_memory_rss)", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "E", + "step": 20 + }, + { + "expr": "node_memory_Buffers", + "hide": true, + "intervalFactor": 2, + "legendFormat": "node_memory_Dirty", + "refId": "N", + "step": 30 + }, + { + "expr": "node_memory_MemFree", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "F", + "step": 20 + }, + { + "expr": "node_memory_MemAvailable", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Available Memory", + "refId": "H", + "step": 20 + }, + { + "expr": "node_memory_MemTotal - node_memory_MemAvailable", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Unavailable Memory", + "refId": "G", + "step": 600 + }, + { + "expr": "node_memory_Inactive", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "I", + "step": 30 + }, + { + "expr": "node_memory_KernelStack", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "J", + "step": 30 + }, + { + "expr": "node_memory_Active", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "K", + "step": 30 + }, + { + "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Unknown", + "refId": "L", + "step": 40 + }, + { + "expr": "node_memory_MemFree + node_memory_Inactive ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "M", + "step": 30 + }, + { + "expr": "container_memory_rss{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "O", + "step": 30 + }, + { + "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "P", + "step": 40 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10000000000 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Available Memory", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": 16000000000, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "IN on /sda": "#7EB26D", + "OUT on /sda": "#890F02" + }, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "-sum(rate(node_disk_bytes_read[$interval])) by (device)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "OUT on /{{device}}", + "metric": "node_disk_bytes_read", + "refId": "A", + "step": 600 + }, + { + "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)", + "intervalFactor": 2, + "legendFormat": "IN on /{{device}}", + "metric": "", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 202, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "New row", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 251, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Row", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 5, + "grid": {}, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 247, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Dashboard Row", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_swap{name=~\".+\"}) by (name)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Swap per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Dashboard Row", + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "editable": true, + "error": false, + "fontSize": "100%", + "id": 37, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 4, + "styles": [ + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "10000000", + " 25000000" + ], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 240 + } + ], + "title": "Usage memory", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "editable": true, + "error": false, + "fontSize": "100%", + "id": 35, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 4, + "styles": [ + { + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "80", + "90" + ], + "type": "number", + "unit": "percent" + } + ], + "targets": [ + { + "expr": "sum(100 - ((container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) * 100 / container_spec_memory_limit_bytes{name=~\".+\"}) ) by (name) ", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 240 + } + ], + "title": "Remaining memory", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "editable": true, + "error": false, + "fontSize": "100%", + "id": 36, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 4, + "styles": [ + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "10000000", + " 25000000" + ], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 240 + } + ], + "title": "Limit memory", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 361, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + } + ] +} \ No newline at end of file diff --git a/grafana/dashboards/node_exporter_metrics.json b/grafana/dashboards/node_exporter_metrics.json new file mode 100644 index 0000000..9afa66e --- /dev/null +++ b/grafana/dashboards/node_exporter_metrics.json @@ -0,0 +1,1488 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Node Exporter Server Metrics", + "description": "Dashboard to view multiple servers", + "tags": [ + "prometheus" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": false, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": "25px", + "panels": [ + { + "content": "", + "editable": true, + "error": false, + "id": 11, + "minSpan": 2, + "mode": "html", + "repeat": "node", + "span": 12, + "style": {}, + "title": "$node", + "type": "text" + } + ], + "title": "Title" + }, + { + "collapse": false, + "editable": true, + "height": "25px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "repeat": "node", + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400, + "target": "" + } + ], + "thresholds": "", + "title": "CPU Cores", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 10, + "grid": { + "threshold1": 0, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "user", + "refId": "B", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "nice", + "refId": "C", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "iowait", + "refId": "E", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))", + "intervalFactor": 2, + "legendFormat": "steal", + "refId": "H", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "idle", + "refId": "D", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "irq", + "refId": "F", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "softirq", + "refId": "G", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "guest", + "refId": "I", + "step": 1200 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "label": "%", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "CPU" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": { + "Slab": "#E5A8E2", + "Swap": "#E24D42" + }, + "bars": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/", + "fill": 5, + "stack": true + }, + { + "alias": "Swap", + "fill": 5, + "stack": true + } + ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Apps", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "node_memory_Buffers{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Buffers", + "refId": "B", + "step": 1200 + }, + { + "expr": "node_memory_Cached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Cached", + "refId": "D", + "step": 1200 + }, + { + "expr": "node_memory_MemFree{instance=~'$node'}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Free", + "refId": "E", + "step": 1200 + }, + { + "expr": "node_memory_Slab{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Slab", + "refId": "F", + "step": 1200 + }, + { + "expr": "node_memory_SwapCached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "SwapCached", + "refId": "G", + "step": 1200 + }, + { + "expr": "node_memory_PageTables{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PageTables", + "refId": "H", + "step": 1200 + }, + { + "expr": "node_memory_VmallocUsed{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "VmallocUsed", + "metric": "", + "refId": "I", + "step": 1200 + }, + { + "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Swap", + "metric": "", + "refId": "C", + "step": 1200 + }, + { + "expr": "node_memory_Committed_AS{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Committed", + "metric": "", + "refId": "J", + "step": 1200 + }, + { + "expr": "node_memory_Mapped{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Mapped", + "refId": "K", + "step": 1200 + }, + { + "expr": "node_memory_Active{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Active", + "metric": "", + "refId": "L", + "step": 1200 + }, + { + "expr": "node_memory_Inactive{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Inactive", + "metric": "", + "refId": "M", + "step": 1200 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": "GB", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Memory" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$node\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "load", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Load" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Used", + "tooltip": { + "msResolution": true, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Disk Used" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Disk Utilization" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read$/", + "transform": "negative-Y" + } + ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + }, + { + "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "B", + "step": 1200 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOs per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "label": "IO/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Disk IOs per device" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read/", + "transform": "negative-Y" + } + ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "refId": "B", + "step": 2400 + }, + { + "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bytes/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Disk Throughput per device" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "context switches", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Context Switches", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "Network Traffic" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_in/", + "transform": "negative-Y" + } + ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_in", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_out", + "refId": "B", + "step": 1200 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "bits", + "label": "bits in (-) / bits out (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", + "intervalFactor": 2, + "legendFormat": "established", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Netstat", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "Udp_NoPorts", + "yaxis": 2 + } + ], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InDatagrams", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InErrors", + "refId": "B", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Udp_OutDatagrams", + "refId": "C", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_NoPorts", + "refId": "D", + "step": 1200 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "UDP Stats", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "free", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Conntrack", + "tooltip": { + "msResolution": false, + "shared": true, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "New row" + } + ], + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allFormat": "glob", + "current": { + "tags": [], + "text": null, + "value": [] + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": "", + "multi": true, + "multiFormat": "regex values", + "name": "node", + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "sort": 1, + "type": "query", + "options": [] + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 12, + "version": 30, + "links": [], + "gnetId": 405 +} \ No newline at end of file diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 4a96027..be293b5 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -32,3 +32,15 @@ scrape_configs: scrape_interval: "15s" static_configs: - targets: ['node_exporter:9100'] + + - job_name: 'cadvisor' + scrape_interval: 5s + static_configs: + - targets: ['cadvisor:8080'] + +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager:9093" diff --git a/undeploy_all.sh b/undeploy_all.sh index cd39b6e..f4a621c 100755 --- a/undeploy_all.sh +++ b/undeploy_all.sh @@ -13,16 +13,16 @@ do_cleanup () { fi - read -r -p "Do you want to delete all docker \"bridge\" networks? [y/N] " response - if [[ "$response" =~ ^([yY][eE][sS]|[yY])+$ ]] - then - for n in $(docker network ls | grep "bridge" | awk '/ / { print $1 }'); do - echo -e "attempting to delete network: $n" - docker network rm $n || echo "cannot remove: $n" - done - else - echo -e "'no' chosen" - fi +# read -r -p "Do you want to delete all docker \"bridge\" networks? [y/N] " response +# if [[ "$response" =~ ^([yY][eE][sS]|[yY])+$ ]] +# then +# for n in $(docker network ls | grep "bridge" | awk '/ / { print $1 }'); do +# echo -e "attempting to delete network: $n" +# docker network rm $n || echo "cannot remove: $n" +# done +# else +# echo -e "'no' chosen" +# fi read -r -p "Do you want to delete all docker dangling images? [y/N] " response if [[ "$response" =~ ^([yY][eE][sS]|[yY])+$ ]] From 2e3d4e8e61490408b50e4093b5ef417e1f584bfc Mon Sep 17 00:00:00 2001 From: Sebastian Blasiak Date: Fri, 15 Jun 2018 16:51:01 +0200 Subject: [PATCH 2/2] updated doc --- README.md | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4db26e5..b1fd32b 100644 --- a/README.md +++ b/README.md @@ -1 +1,29 @@ -# monitoring-grafana-influxdb-telegraf-prometheus +# What is in this repository? + +This repository contains easily deployable monitoring solution which uses: + - Grafana (frontend for monitoring + alerts) + - Prometheus (monitoring solution pulling metrics from exporter) + - Node Exporter for Prometheus (metrics exporter-exposer for Prometheus) + - Telegraf (monitoring agent) + - InfluxDB (persistent timeseries storage) + - cAdvisor (containers monitoring) + - alertmanager (alerting) + + +# How to use it? + +If you have docker and docker-compose installed, this will take roughly 1 minute to have it up and running. +If not - it will still take mentioned ~ 1 minute + time needed for docker installation. + +## Here is how to install: + +* $ clone the repository +* $ cd to cloned dir +* $ chmod +x ./deploy_all.sh; ./deploy_all.sh + + + Monitoring should be up and running http://_**hostname**_:3001/ + + + +