adding some base for monitoring, before adding prometheus and the rest
This commit is contained in:
commit
9487fb10aa
73
.gitignore
vendored
Normal file
73
.gitignore
vendored
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# swp vim edits
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
.os_release.sh
|
||||||
|
.configuration.cluster
|
||||||
|
.configuration.installs
|
||||||
|
.awscli-bundle/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
*.pyc
|
||||||
|
**/.cache
|
||||||
|
**/.coverage
|
||||||
|
|
||||||
|
# Eclipse related files
|
||||||
|
.classpath
|
||||||
|
.cproject
|
||||||
|
.deps
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.metadata/
|
||||||
|
|
||||||
|
# MAC DS files
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# STS
|
||||||
|
.springBeans
|
||||||
|
# Maven and build related
|
||||||
|
target/
|
||||||
|
packages.mk
|
||||||
|
|
||||||
|
# IntelliJ IDEA related files
|
||||||
|
*.iml
|
||||||
|
*.iws
|
||||||
|
*.ipr
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
.sonar-ide.properties
|
||||||
|
|
||||||
|
|
||||||
|
*.orig
|
||||||
|
*rebel*.xml
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
/INSTALL
|
||||||
|
/Makefile
|
||||||
|
/aclocal.m4
|
||||||
|
/autom4te.cache/
|
||||||
|
/build/aux/
|
||||||
|
/config.log
|
||||||
|
/config.status
|
||||||
|
/configure
|
||||||
|
|
||||||
|
# sed related files
|
||||||
|
**/*.bak_remove
|
||||||
|
|
||||||
|
# terraform state files
|
||||||
|
**/.terraform
|
||||||
|
**/*.tfstate
|
||||||
|
**/*.tfstate.backup
|
||||||
|
**/.terraform.tfstate.lock.info
|
||||||
|
|
||||||
|
# Folders and files
|
||||||
|
certificates
|
||||||
|
terraform
|
||||||
|
configuration.cluster
|
||||||
|
configuration.installs
|
||||||
|
other
|
||||||
|
ec2
|
||||||
|
monitoring.backup
|
||||||
|
monitoring/monitoring_*_dir
|
||||||
|
**/*.bak_remove
|
||||||
|
**/*.wrapped
|
||||||
|
data
|
54
docker-compose.yml
Normal file
54
docker-compose.yml
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
public: {}
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
grafana_lib: {}
|
||||||
|
grafana_ds:
|
||||||
|
|
||||||
|
services:
|
||||||
|
influxdb:
|
||||||
|
image: influxdb:alpine
|
||||||
|
container_name: influxdb
|
||||||
|
ports:
|
||||||
|
- "8086:8086"
|
||||||
|
networks:
|
||||||
|
- public
|
||||||
|
volumes:
|
||||||
|
- ./data/influxdb:/var/lib/influxdb
|
||||||
|
environment:
|
||||||
|
INFLUXDB_REPORTING_DISABLED: "true"
|
||||||
|
INFLUXDB_DB: telegraf
|
||||||
|
INFLUXDB_USER: telegraf
|
||||||
|
INFLUXDB_USER_PASSWORD: nimda
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:5.1.3
|
||||||
|
container_name: grafana
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
networks:
|
||||||
|
- public
|
||||||
|
volumes:
|
||||||
|
- grafana_lib:/var/lib/grafana
|
||||||
|
- grafana_ds:/var/lib/grafana/ds:rw
|
||||||
|
- ${PWD}/grafana/add_datasources.sh:/var/lib/grafana/ds/add_datasources.sh
|
||||||
|
environment:
|
||||||
|
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||||
|
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
|
||||||
|
INFLUXDB_URI: "http://influxdb:8086"
|
||||||
|
INFLUXDB_DB: telegraf
|
||||||
|
INFLUXDB_USER: telegraf
|
||||||
|
INFLUXDB_USER_PASSWORD: nimda
|
||||||
|
command: ["bash", "/var/lib/grafana/ds/add_datasources.sh"]
|
||||||
|
|
||||||
|
telegraf:
|
||||||
|
image: telegraf:latest
|
||||||
|
container_name: telegraf
|
||||||
|
network_mode: "host"
|
||||||
|
volumes:
|
||||||
|
- ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
|
||||||
|
environment:
|
||||||
|
# real influx host
|
||||||
|
INFLUXDB_URI: "http://localhost:8086"
|
97
grafana/add_dashboards.sh
Normal file
97
grafana/add_dashboards.sh
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
RED='\033[00;31m'
|
||||||
|
GREEN='\033[00;32m'
|
||||||
|
YELLOW='\033[00;33m'
|
||||||
|
BLUE='\033[00;34m'
|
||||||
|
PURPLE='\033[00;35m'
|
||||||
|
CYAN='\033[00;36m'
|
||||||
|
LIGHTGRAY='\033[00;37m'
|
||||||
|
MAGENTA='\033[00;35m'
|
||||||
|
LRED='\033[01;31m'
|
||||||
|
LGREEN='\033[01;32m'
|
||||||
|
LYELLOW='\033[01;33m'
|
||||||
|
LBLUE='\033[01;34m'
|
||||||
|
LPURPLE='\033[01;35m'
|
||||||
|
LCYAN='\033[01;36m'
|
||||||
|
WHITE='\033[01;37m'
|
||||||
|
|
||||||
|
GRAFANA_URL=http://admin:admin@localhost:3000
|
||||||
|
|
||||||
|
|
||||||
|
grafana_api() {
|
||||||
|
local verb=$1
|
||||||
|
local url=$2
|
||||||
|
local params=$3
|
||||||
|
local bodyfile=$4
|
||||||
|
local response
|
||||||
|
local cmd
|
||||||
|
|
||||||
|
cmd="curl -L -s --fail -H \"Accept: application/json\" -H \"Content-Type: application/json\" -X ${verb} -k ${GRAFANA_URL}${url}"
|
||||||
|
[[ -n "${params}" ]] && cmd="${cmd} -d \"${params}\""
|
||||||
|
[[ -n "${bodyfile}" ]] && cmd="${cmd} --data @${bodyfile}"
|
||||||
|
echo -e "Running ${cmd}"
|
||||||
|
eval ${cmd} || return 1
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_api() {
|
||||||
|
echo -e "${BLUE}Waiting for Grafana to be available...${NC}"
|
||||||
|
while ! grafana_api GET /api/user/preferences
|
||||||
|
do
|
||||||
|
echo -e "${BLUE}Waiting still...${NC}"
|
||||||
|
sleep 15
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
replace_datasource() {
|
||||||
|
local dashboard_file=$1
|
||||||
|
local datasource_name=$2
|
||||||
|
cmd="sed -i.bak_remove \"s/\\\${DS_INFLUXDB}/${datasource_name}/g\" ${dashboard_file}"
|
||||||
|
eval ${cmd} || return 1
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
install_dashboards() {
|
||||||
|
local dashboard
|
||||||
|
|
||||||
|
for dashboard in dashboards/*.json
|
||||||
|
|
||||||
|
do
|
||||||
|
if [[ $(grep "\"name\": \"DS_INFLUXDB\"," ${dashboard}) ]]; then
|
||||||
|
echo -e "${PURPLE}Dashboard ${dashboard} seems to be for InfluxDB datasource${NC}"
|
||||||
|
datasource_name="influxdb"
|
||||||
|
fi
|
||||||
|
if [[ $(grep "\"name\": \"DS_PROMETHEUS\"," ${dashboard}) ]]; then
|
||||||
|
echo -e "${PURPLE}Dashboard ${dashboard} seems to be for Prometheus datasource${NC}"
|
||||||
|
datasource_name="prometheus"
|
||||||
|
fi
|
||||||
|
if [[ -f "${dashboard}" ]]; then
|
||||||
|
echo -e "${LCYAN}Installing dashboard ${dashboard}${NC}"
|
||||||
|
replace_datasource ${dashboard} ${datasource_name}
|
||||||
|
# backup will be created before wrapping dashboard ^
|
||||||
|
#echo -e "{\"dashboard\": `cat $dashboard`}" > "${dashboard}.wrapped"
|
||||||
|
cp ${dashboard} ${dashboard}.wrapped
|
||||||
|
sed -i '1s/^/{"dashboard":\n/' ${dashboard}.wrapped
|
||||||
|
echo "}" >> ${dashboard}.wrapped
|
||||||
|
|
||||||
|
if grafana_api POST /api/dashboards/db "" "${dashboard}.wrapped"; then
|
||||||
|
echo -e "\n** ${GREEN}installed ok **${NC}"
|
||||||
|
else
|
||||||
|
echo -e "\n** ${RED}installation of: ${PURPLE}\"${dashboard}\"${RED} failed **${NC}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
#rm ${dashboard}.wrapped
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
configure_grafana() {
|
||||||
|
wait_for_api
|
||||||
|
install_dashboards
|
||||||
|
}
|
||||||
|
|
||||||
|
configure_grafana
|
||||||
|
|
31
grafana/add_datasources.sh
Normal file
31
grafana/add_datasources.sh
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#set -e
|
||||||
|
|
||||||
|
# ADD INFLUXDB DATASOURCE
|
||||||
|
curl -s -v -H "Content-Type: application/json" \
|
||||||
|
-XPOST http://admin:admin@localhost:3000/api/datasources \
|
||||||
|
-d @- <<EOF
|
||||||
|
{
|
||||||
|
"name": "influxdb",
|
||||||
|
"type": "influxdb",
|
||||||
|
"access": "proxy",
|
||||||
|
"url": "http://influxdb:8086",
|
||||||
|
"database": "telegraf",
|
||||||
|
"user":"telegraf",
|
||||||
|
"password":"nimda",
|
||||||
|
"basicAuth":false
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
## ADD PROMETHEUS DATASOURCE
|
||||||
|
curl -s -v -H "Content-Type: application/json" \
|
||||||
|
-XPOST http://admin:admin@localhost:3000/api/datasources \
|
||||||
|
-d @- <<EOF
|
||||||
|
{
|
||||||
|
"name": "prometheus",
|
||||||
|
"type": "prometheus",
|
||||||
|
"access": "proxy",
|
||||||
|
"url": "http://prometheus:9090"
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
2122
grafana/dashboards/prometheus-system.json
Normal file
2122
grafana/dashboards/prometheus-system.json
Normal file
File diff suppressed because it is too large
Load Diff
8131
grafana/dashboards/telegraf-dashboard.json
Normal file
8131
grafana/dashboards/telegraf-dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
148
telegraf.conf
Normal file
148
telegraf.conf
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
[agent]
|
||||||
|
## Default data collection interval for all inputs
|
||||||
|
interval = "10s"
|
||||||
|
## Rounds collection interval to 'interval'
|
||||||
|
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
|
||||||
|
round_interval = true
|
||||||
|
## Telegraf will send metrics to outputs in batches of at
|
||||||
|
## most metric_batch_size metrics.
|
||||||
|
metric_batch_size = 1000
|
||||||
|
## For failed writes, telegraf will cache metric_buffer_limit metrics for each
|
||||||
|
## output, and will flush this buffer on a successful write. Oldest metrics
|
||||||
|
## are dropped first when this buffer fills.
|
||||||
|
metric_buffer_limit = 10000
|
||||||
|
## Collection jitter is used to jitter the collection by a random amount.
|
||||||
|
## Each plugin will sleep for a random time within jitter before collecting.
|
||||||
|
## This can be used to avoid many plugins querying things like sysfs at the
|
||||||
|
## same time, which can have a measurable effect on the system.
|
||||||
|
collection_jitter = "0s"
|
||||||
|
## Default flushing interval for all outputs. You shouldn't set this below
|
||||||
|
## interval. Maximum flush_interval will be flush_interval + flush_jitter
|
||||||
|
flush_interval = "10s"
|
||||||
|
## Jitter the flush interval by a random amount. This is primarily to avoid
|
||||||
|
## large write spikes for users running a large number of telegraf instances.
|
||||||
|
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
|
||||||
|
flush_jitter = "0s"
|
||||||
|
## By default, precision will be set to the same timestamp order as the
|
||||||
|
## collection interval, with the maximum being 1s.
|
||||||
|
## Precision will NOT be used for service inputs, such as logparser and statsd.
|
||||||
|
## Valid values are "ns", "us" (or "µs"), "ms", "s".
|
||||||
|
precision = ""
|
||||||
|
## Run telegraf in debug mode
|
||||||
|
debug = false
|
||||||
|
## Run telegraf in quiet mode
|
||||||
|
quiet = false
|
||||||
|
## Override default hostname, if empty use os.Hostname()
|
||||||
|
hostname = ""
|
||||||
|
## If set to true, do no set the "host" tag in the telegraf agent.
|
||||||
|
omit_hostname = false
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# OUTPUT PLUGINS #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Configuration for influxdb server to send metrics to
|
||||||
|
[[outputs.influxdb]]
|
||||||
|
|
||||||
|
## The full HTTP or UDP endpoint URL for your InfluxDB instance.
|
||||||
|
## Multiple urls can be specified as part of the same cluster,
|
||||||
|
## this means that only ONE of the urls will be written to each interval.
|
||||||
|
# urls = ["udp://localhost:8089"] # UDP endpoint example
|
||||||
|
|
||||||
|
urls = ["$INFLUXDB_URI"] # required
|
||||||
|
|
||||||
|
|
||||||
|
## The target database for metrics (telegraf will create it if not exists).
|
||||||
|
database = "telegraf" # required
|
||||||
|
## Retention policy to write to. Empty string writes to the default rp.
|
||||||
|
retention_policy = ""
|
||||||
|
## Write consistency (clusters only), can be: "any", "one", "quorum", "all"
|
||||||
|
write_consistency = "any"
|
||||||
|
## Write timeout (for the InfluxDB client), formatted as a string.
|
||||||
|
## If not provided, will default to 5s. 0s means no timeout (not recommended).
|
||||||
|
timeout = "5s"
|
||||||
|
username = "telegraf"
|
||||||
|
password = "nimda"
|
||||||
|
## Set the user agent for HTTP POSTs (can be useful for log differentiation)
|
||||||
|
user_agent = "telegraf-agent"
|
||||||
|
## Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
|
||||||
|
# udp_payload = 512
|
||||||
|
|
||||||
|
## Optional SSL Config
|
||||||
|
# ssl_ca = "/etc/telegraf/ca.pem"
|
||||||
|
# ssl_cert = "/etc/telegraf/cert.pem"
|
||||||
|
# ssl_key = "/etc/telegraf/key.pem"
|
||||||
|
## Use SSL but skip chain & host verification
|
||||||
|
# insecure_skip_verify = false
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# INPUT PLUGINS #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Read metrics about cpu usage
|
||||||
|
[[inputs.cpu]]
|
||||||
|
## Whether to report per-cpu stats or not
|
||||||
|
percpu = true
|
||||||
|
## Whether to report total system cpu stats or not
|
||||||
|
totalcpu = true
|
||||||
|
## Comment this line if you want the raw CPU time metrics
|
||||||
|
fielddrop = ["time_*"]
|
||||||
|
collect_cpu_time = true
|
||||||
|
report_active = false
|
||||||
|
|
||||||
|
# Read metrics about disk usage by mount point
|
||||||
|
[[inputs.disk]]
|
||||||
|
#
|
||||||
|
#
|
||||||
|
## By default, telegraf gather stats for all mountpoints.
|
||||||
|
## Setting mountpoints will restrict the stats to the specified mountpoints.
|
||||||
|
# mount_points = ["/"]
|
||||||
|
|
||||||
|
## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
|
||||||
|
## present on /run, /var/run, /dev/shm or /dev).
|
||||||
|
ignore_fs = ["tmpfs", "devtmpfs"]
|
||||||
|
|
||||||
|
[[inputs.net]]
|
||||||
|
# no configuration
|
||||||
|
# Read metrics about memory usage
|
||||||
|
[[inputs.mem]]
|
||||||
|
# no configuration
|
||||||
|
|
||||||
|
# Read metrics about system load & uptime
|
||||||
|
[[inputs.system]]
|
||||||
|
# no configuration
|
||||||
|
[[inputs.processes]]
|
||||||
|
[[inputs.netstat]]
|
||||||
|
[[inputs.kernel]]
|
||||||
|
|
||||||
|
# Read flattened metrics from one or more JSON HTTP endpoints
|
||||||
|
#[[inputs.httpjson]]
|
||||||
|
# a name for the service being polled
|
||||||
|
#name_override = "monitoring-sebson"
|
||||||
|
|
||||||
|
# Your servers
|
||||||
|
#servers = [
|
||||||
|
# "http://localhost:4444/status"
|
||||||
|
#]
|
||||||
|
#response_timeout = "5s"
|
||||||
|
## HTTP method to use: GET or POST (case-sensitive)
|
||||||
|
#method = "GET"
|
||||||
|
[[inputs.interrupts]]
|
||||||
|
## To filter which IRQs to collect, make use of tagpass / tagdrop, i.e.
|
||||||
|
# [inputs.interrupts.tagdrop]
|
||||||
|
# irq = [ "NET_RX", "TASKLET" ]
|
||||||
|
# Collects conntrack stats from the configured directories and files.
|
||||||
|
[[inputs.conntrack]]
|
||||||
|
## The following defaults would work with multiple versions of conntrack.
|
||||||
|
## Note the nf_ and ip_ filename prefixes are mutually exclusive across
|
||||||
|
## kernel versions, as are the directory locations.
|
||||||
|
|
||||||
|
## Superset of filenames to look for within the conntrack dirs.
|
||||||
|
## Missing files will be ignored.
|
||||||
|
files = ["ip_conntrack_count","ip_conntrack_max",
|
||||||
|
"nf_conntrack_count","nf_conntrack_max"]
|
||||||
|
|
||||||
|
## Directories to search within for the conntrack files above.
|
||||||
|
## Missing directrories will be ignored.
|
||||||
|
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
|
Loading…
Reference in New Issue
Block a user