influx-templates/vsphere/vsphere.yml
2022-10-18 11:49:53 +00:00

554 lines
21 KiB
YAML

apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: distracted-yonath-5cb001
spec:
color: '#326BBA'
name: vsphere
---
apiVersion: influxdata.com/v2alpha1
kind: Bucket
metadata:
name: noshing-fermat-5cb005
spec:
associations:
- kind: Label
name: distracted-yonath-5cb001
name: vsphere
---
apiVersion: influxdata.com/v2alpha1
kind: Dashboard
metadata:
name: vsphere
spec:
associations:
- kind: Label
name: distracted-yonath-5cb001
charts:
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 3
kind: Single_Stat
name: Uptime
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_sys")
|> filter(fn: (r) => r["_field"] == "uptime_latest")
|> skew()
|> yield(name: "skew")
suffix: ' Days'
width: 2
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
suffix: ' kBs'
colors:
- hex: '#31C0F6'
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
name: Nineteen Eighty Four
type: scale
geom: line
height: 2
kind: Xy
name: Network Usage
position: overlaid
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_net")
|> filter(fn: (r) => r["_field"] == "usage_average")
width: 5
xCol: _time
yCol: _value
yPos: 3
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 3
kind: Single_Stat
name: CPU Usage
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_cpu")
|> filter(fn: (r) => r["_field"] == "usagemhz_average")
suffix: ' MHz'
width: 1
xPos: 2
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 1
height: 3
kind: Single_Stat
name: RAM Usage
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_mem")
|> filter(fn: (r) => r["_field"] == "totalCapacity_average")
|> aggregateWindow(every: v.windowPeriod, fn: max)
|> yield(name: "max")
suffix: ' MB'
width: 1
xPos: 3
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
colors:
- hex: '#31C0F6'
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
name: Nineteen Eighty Four
type: scale
geom: line
height: 3
kind: Xy
name: CPU Utilization Avg %
position: overlaid
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_cpu")
|> filter(fn: (r) => r["_field"] == "usage_average")
width: 4
xCol: _time
xPos: 4
yCol: _value
- axes:
- base: "10"
name: y
scale: linear
- base: "10"
name: x
scale: linear
colors:
- hex: '#31C0F6'
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
name: Nineteen Eighty Four
type: scale
geom: line
height: 2
kind: Xy
name: Total Disk Latency
position: overlaid
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_disk")
|> filter(fn: (r) => r["_field"] == "totalReadLatency_average" or r["_field"] == "totalWriteLatency_average")
width: 4
xCol: _time
xPos: 5
yCol: _value
yPos: 3
- axes:
- base: "10"
name: x
scale: linear
- name: y
scale: linear
suffix: ' MB'
colors:
- hex: '#31C0F6'
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
name: Nineteen Eighty Four
type: scale
geom: line
height: 3
kind: Xy
name: RAM Utilization
position: stacked
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_mem")
|> filter(fn: (r) => r["_field"] == "totalCapacity_average")
|> aggregateWindow(every: v.windowPeriod, fn: max)
|> yield(name: "max")
shade: true
width: 4
xCol: _time
xPos: 8
yCol: _value
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
colors:
- hex: '#31C0F6'
name: Nineteen Eighty Four
type: scale
- hex: '#A500A5'
name: Nineteen Eighty Four
type: scale
- hex: '#FF7E27'
name: Nineteen Eighty Four
type: scale
geom: line
height: 2
kind: Xy
name: Storage Adapter Latency
position: overlaid
queries:
- query: |-
from(bucket: "vsphere")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_storageAdapter")
|> filter(fn: (r) => r["_field"] == "read_average" or r["_field"] == "write_average")
width: 3
xCol: _time
xPos: 9
yCol: _value
yPos: 3
---
apiVersion: influxdata.com/v2alpha1
kind: Telegraf
metadata:
name: vsphere
spec:
config: |
# Telegraf Configuration
#
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
#
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
#
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
#
# Environment variables can be used anywhere in this config file, simply surround
# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"),
# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR})
# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
## Environment variables can be used as tags, and throughout the config file
# user = "$USER"
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "10s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 1000
## Maximum number of unwritten metrics per output. Increasing this value
## allows for longer periods of output downtime without dropping metrics at the
## cost of higher maximum memory usage.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. Maximum flush_interval will be
## flush_interval + flush_jitter
flush_interval = "10s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## By default or when set to "0s", precision will be set to the same
## timestamp order as the collection interval, with the maximum being 1s.
## ie, when interval = "10s", precision will be "1s"
## when interval = "250ms", precision will be "1ms"
## Precision will NOT be used for service inputs. It is up to each individual
## service input to set the timestamp at the appropriate precision.
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
precision = ""
## Log at debug level.
# debug = false
## Log only error level messages.
# quiet = false
## Log target controls the destination for logs and can be one of "file",
## "stderr" or, on Windows, "eventlog". When set to "file", the output file
## is determined by the "logfile" setting.
# logtarget = "file"
## Name of the file to be logged to when using the "file" logtarget. If set to
## the empty string then logs are written to stderr.
# logfile = ""
## The logfile will be rotated after the time interval specified. When set
## to 0 no time based rotation is performed. Logs are rotated only when
## written to, if there is no log activity rotation may be delayed.
# logfile_rotation_interval = "0d"
## The logfile will be rotated when it becomes larger than the specified
## size. When set to 0 no size based rotation is performed.
# logfile_rotation_max_size = "0MB"
## Maximum number of rotated archives to keep, any older logs are deleted.
## If set to -1, no archives are removed.
# logfile_rotation_max_archives = 5
## Override default hostname, if empty use os.Hostname()
hostname = ""
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = false
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
[[outputs.influxdb_v2]]
## The URLs of the InfluxDB cluster nodes.
##
## Multiple URLs can be specified for a single cluster, only ONE of the
## urls will be written to each interval.
## urls exp: http://127.0.0.1:9999
urls = ["$INFLUX_HOST"]
## Token for authentication.
token = "$INFLUX_TOKEN"
## Organization is the name of the organization you wish to write to; must exist.
organization = "$INFLUX_ORG"
## Destination bucket to write into.
bucket = "$INFLUX_BUCKET"
# Read metrics from one or many vCenters
[[inputs.vsphere]]
## List of vCenter URLs to be monitored. These three lines must be uncommented
## and edited for the plugin to work.
vcenters = [ "https://$VSPHERE_HOST/sdk" ]
username = "$vsphere-user"
password = "$vsphere-password"
## VMs
## Typical VM metrics (if omitted or empty, all metrics are collected)
# vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected)
# vm_exclude = [] # Inventory paths to exclude
vm_metric_include = [
"cpu.demand.average",
"cpu.idle.summation",
"cpu.latency.average",
"cpu.readiness.average",
"cpu.ready.summation",
"cpu.run.summation",
"cpu.usagemhz.average",
"cpu.used.summation",
"cpu.wait.summation",
"mem.active.average",
"mem.granted.average",
"mem.latency.average",
"mem.swapin.average",
"mem.swapinRate.average",
"mem.swapout.average",
"mem.swapoutRate.average",
"mem.usage.average",
"mem.vmmemctl.average",
"net.bytesRx.average",
"net.bytesTx.average",
"net.droppedRx.summation",
"net.droppedTx.summation",
"net.usage.average",
"power.power.average",
"virtualDisk.numberReadAveraged.average",
"virtualDisk.numberWriteAveraged.average",
"virtualDisk.read.average",
"virtualDisk.readOIO.latest",
"virtualDisk.throughput.usage.average",
"virtualDisk.totalReadLatency.average",
"virtualDisk.totalWriteLatency.average",
"virtualDisk.write.average",
"virtualDisk.writeOIO.latest",
"sys.uptime.latest",
]
# vm_metric_exclude = [] ## Nothing is excluded by default
# vm_instances = true ## true by default
## Hosts
## Typical host metrics (if omitted or empty, all metrics are collected)
# host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected)
# host_exclude [] # Inventory paths to exclude
host_metric_include = [
"cpu.coreUtilization.average",
"cpu.costop.summation",
"cpu.demand.average",
"cpu.idle.summation",
"cpu.latency.average",
"cpu.readiness.average",
"cpu.ready.summation",
"cpu.swapwait.summation",
"cpu.usage.average",
"cpu.usagemhz.average",
"cpu.used.summation",
"cpu.utilization.average",
"cpu.wait.summation",
"disk.deviceReadLatency.average",
"disk.deviceWriteLatency.average",
"disk.kernelReadLatency.average",
"disk.kernelWriteLatency.average",
"disk.numberReadAveraged.average",
"disk.numberWriteAveraged.average",
"disk.read.average",
"disk.totalReadLatency.average",
"disk.totalWriteLatency.average",
"disk.write.average",
"mem.active.average",
"mem.latency.average",
"mem.state.latest",
"mem.swapin.average",
"mem.swapinRate.average",
"mem.swapout.average",
"mem.swapoutRate.average",
"mem.totalCapacity.average",
"mem.usage.average",
"mem.vmmemctl.average",
"net.bytesRx.average",
"net.bytesTx.average",
"net.droppedRx.summation",
"net.droppedTx.summation",
"net.errorsRx.summation",
"net.errorsTx.summation",
"net.usage.average",
"power.power.average",
"storageAdapter.numberReadAveraged.average",
"storageAdapter.numberWriteAveraged.average",
"storageAdapter.read.average",
"storageAdapter.write.average",
"sys.uptime.latest",
]
## Collect IP addresses? Valid values are "ipv4" and "ipv6"
# ip_addresses = ["ipv6", "ipv4" ]
# host_metric_exclude = [] ## Nothing excluded by default
# host_instances = true ## true by default
## Clusters
# cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
# cluster_exclude = [] # Inventory paths to exclude
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
# cluster_metric_exclude = [] ## Nothing excluded by default
# cluster_instances = false ## false by default
## Datastores
# datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
# datastore_exclude = [] # Inventory paths to exclude
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
# datastore_metric_exclude = [] ## Nothing excluded by default
# datastore_instances = false ## false by default
## Datacenters
# datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
# datacenter_exclude = [] # Inventory paths to exclude
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
# datacenter_instances = false ## false by default
## Plugin Settings
## separator character to use for measurement and field names (default: "_")
# separator = "_"
## number of objects to retreive per query for realtime resources (vms and hosts)
## set to 64 for vCenter 5.5 and 6.0 (default: 256)
# max_query_objects = 256
## number of metrics to retreive per query for non-realtime resources (clusters and datastores)
## set to 64 for vCenter 5.5 and 6.0 (default: 256)
# max_query_metrics = 256
## number of go routines to use for collection and discovery of objects and metrics
# collect_concurrency = 1
# discover_concurrency = 1
## the interval before (re)discovering objects subject to metrics collection (default: 300s)
# object_discovery_interval = "300s"
## timeout applies to any of the api request made to vcenter
# timeout = "60s"
## When set to true, all samples are sent as integers. This makes the output
## data types backwards compatible with Telegraf 1.9 or lower. Normally all
## samples from vCenter, with the exception of percentages, are integer
## values, but under some conditions, some averaging takes place internally in
## the plugin. Setting this flag to "false" will send values as floats to
## preserve the full precision when averaging takes place.
# use_int_samples = true
## Custom attributes from vCenter can be very useful for queries in order to slice the
## metrics along different dimension and for forming ad-hoc relationships. They are disabled
## by default, since they can add a considerable amount of tags to the resulting metrics. To
## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include
## to select the attributes you want to include.
## By default, since they can add a considerable amount of tags to the resulting metrics. To
## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include
## to select the attributes you want to include.
# custom_attribute_include = []
# custom_attribute_exclude = ["*"]
## Optional SSL Config
# ssl_ca = "/path/to/cafile"
# ssl_cert = "/path/to/certfile"
# ssl_key = "/path/to/keyfile"
## Use SSL but skip chain & host verification
insecure_skip_verify = true