554 lines
21 KiB
YAML
554 lines
21 KiB
YAML
apiVersion: influxdata.com/v2alpha1
|
|
kind: Label
|
|
metadata:
|
|
name: distracted-yonath-5cb001
|
|
spec:
|
|
color: '#326BBA'
|
|
name: vsphere
|
|
---
|
|
apiVersion: influxdata.com/v2alpha1
|
|
kind: Bucket
|
|
metadata:
|
|
name: noshing-fermat-5cb005
|
|
spec:
|
|
associations:
|
|
- kind: Label
|
|
name: distracted-yonath-5cb001
|
|
name: vsphere
|
|
---
|
|
apiVersion: influxdata.com/v2alpha1
|
|
kind: Dashboard
|
|
metadata:
|
|
name: vsphere
|
|
spec:
|
|
associations:
|
|
- kind: Label
|
|
name: distracted-yonath-5cb001
|
|
charts:
|
|
- colors:
|
|
- hex: '#00C9FF'
|
|
name: laser
|
|
type: text
|
|
decimalPlaces: 2
|
|
height: 3
|
|
kind: Single_Stat
|
|
name: Uptime
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_sys")
|
|
|> filter(fn: (r) => r["_field"] == "uptime_latest")
|
|
|> skew()
|
|
|> yield(name: "skew")
|
|
suffix: ' Days'
|
|
width: 2
|
|
- axes:
|
|
- base: "10"
|
|
name: x
|
|
scale: linear
|
|
- base: "10"
|
|
name: y
|
|
scale: linear
|
|
suffix: ' kBs'
|
|
colors:
|
|
- hex: '#31C0F6'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#A500A5'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#FF7E27'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
geom: line
|
|
height: 2
|
|
kind: Xy
|
|
name: Network Usage
|
|
position: overlaid
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_net")
|
|
|> filter(fn: (r) => r["_field"] == "usage_average")
|
|
width: 5
|
|
xCol: _time
|
|
yCol: _value
|
|
yPos: 3
|
|
- colors:
|
|
- hex: '#00C9FF'
|
|
name: laser
|
|
type: text
|
|
decimalPlaces: 2
|
|
height: 3
|
|
kind: Single_Stat
|
|
name: CPU Usage
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_cpu")
|
|
|> filter(fn: (r) => r["_field"] == "usagemhz_average")
|
|
suffix: ' MHz'
|
|
width: 1
|
|
xPos: 2
|
|
- colors:
|
|
- hex: '#00C9FF'
|
|
name: laser
|
|
type: text
|
|
decimalPlaces: 1
|
|
height: 3
|
|
kind: Single_Stat
|
|
name: RAM Usage
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_mem")
|
|
|> filter(fn: (r) => r["_field"] == "totalCapacity_average")
|
|
|> aggregateWindow(every: v.windowPeriod, fn: max)
|
|
|> yield(name: "max")
|
|
suffix: ' MB'
|
|
width: 1
|
|
xPos: 3
|
|
- axes:
|
|
- base: "10"
|
|
name: x
|
|
scale: linear
|
|
- base: "10"
|
|
name: y
|
|
scale: linear
|
|
colors:
|
|
- hex: '#31C0F6'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#A500A5'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#FF7E27'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
geom: line
|
|
height: 3
|
|
kind: Xy
|
|
name: CPU Utilization Avg %
|
|
position: overlaid
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_cpu")
|
|
|> filter(fn: (r) => r["_field"] == "usage_average")
|
|
width: 4
|
|
xCol: _time
|
|
xPos: 4
|
|
yCol: _value
|
|
- axes:
|
|
- base: "10"
|
|
name: y
|
|
scale: linear
|
|
- base: "10"
|
|
name: x
|
|
scale: linear
|
|
colors:
|
|
- hex: '#31C0F6'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#A500A5'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#FF7E27'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
geom: line
|
|
height: 2
|
|
kind: Xy
|
|
name: Total Disk Latency
|
|
position: overlaid
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_disk")
|
|
|> filter(fn: (r) => r["_field"] == "totalReadLatency_average" or r["_field"] == "totalWriteLatency_average")
|
|
width: 4
|
|
xCol: _time
|
|
xPos: 5
|
|
yCol: _value
|
|
yPos: 3
|
|
- axes:
|
|
- base: "10"
|
|
name: x
|
|
scale: linear
|
|
- name: y
|
|
scale: linear
|
|
suffix: ' MB'
|
|
colors:
|
|
- hex: '#31C0F6'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#A500A5'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#FF7E27'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
geom: line
|
|
height: 3
|
|
kind: Xy
|
|
name: RAM Utilization
|
|
position: stacked
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_mem")
|
|
|> filter(fn: (r) => r["_field"] == "totalCapacity_average")
|
|
|> aggregateWindow(every: v.windowPeriod, fn: max)
|
|
|> yield(name: "max")
|
|
shade: true
|
|
width: 4
|
|
xCol: _time
|
|
xPos: 8
|
|
yCol: _value
|
|
- axes:
|
|
- base: "10"
|
|
name: x
|
|
scale: linear
|
|
- base: "10"
|
|
name: y
|
|
scale: linear
|
|
colors:
|
|
- hex: '#31C0F6'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#A500A5'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
- hex: '#FF7E27'
|
|
name: Nineteen Eighty Four
|
|
type: scale
|
|
geom: line
|
|
height: 2
|
|
kind: Xy
|
|
name: Storage Adapter Latency
|
|
position: overlaid
|
|
queries:
|
|
- query: |-
|
|
from(bucket: "vsphere")
|
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
|
|> filter(fn: (r) => r["_measurement"] == "vsphere_host_storageAdapter")
|
|
|> filter(fn: (r) => r["_field"] == "read_average" or r["_field"] == "write_average")
|
|
width: 3
|
|
xCol: _time
|
|
xPos: 9
|
|
yCol: _value
|
|
yPos: 3
|
|
|
|
---
|
|
|
|
apiVersion: influxdata.com/v2alpha1
|
|
kind: Telegraf
|
|
metadata:
|
|
name: vsphere
|
|
spec:
|
|
config: |
|
|
# Telegraf Configuration
|
|
#
|
|
# Telegraf is entirely plugin driven. All metrics are gathered from the
|
|
# declared inputs, and sent to the declared outputs.
|
|
#
|
|
# Plugins must be declared in here to be active.
|
|
# To deactivate a plugin, comment out the name and any variables.
|
|
#
|
|
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
|
|
# file would generate.
|
|
#
|
|
# Environment variables can be used anywhere in this config file, simply surround
|
|
# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"),
|
|
# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR})
|
|
|
|
|
|
# Global tags can be specified here in key="value" format.
|
|
[global_tags]
|
|
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
|
|
# rack = "1a"
|
|
## Environment variables can be used as tags, and throughout the config file
|
|
# user = "$USER"
|
|
|
|
|
|
# Configuration for telegraf agent
|
|
[agent]
|
|
## Default data collection interval for all inputs
|
|
interval = "10s"
|
|
## Rounds collection interval to 'interval'
|
|
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
|
|
round_interval = true
|
|
|
|
## Telegraf will send metrics to outputs in batches of at most
|
|
## metric_batch_size metrics.
|
|
## This controls the size of writes that Telegraf sends to output plugins.
|
|
metric_batch_size = 1000
|
|
|
|
## Maximum number of unwritten metrics per output. Increasing this value
|
|
## allows for longer periods of output downtime without dropping metrics at the
|
|
## cost of higher maximum memory usage.
|
|
metric_buffer_limit = 10000
|
|
|
|
## Collection jitter is used to jitter the collection by a random amount.
|
|
## Each plugin will sleep for a random time within jitter before collecting.
|
|
## This can be used to avoid many plugins querying things like sysfs at the
|
|
## same time, which can have a measurable effect on the system.
|
|
collection_jitter = "0s"
|
|
|
|
## Default flushing interval for all outputs. Maximum flush_interval will be
|
|
## flush_interval + flush_jitter
|
|
flush_interval = "10s"
|
|
## Jitter the flush interval by a random amount. This is primarily to avoid
|
|
## large write spikes for users running a large number of telegraf instances.
|
|
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
|
|
flush_jitter = "0s"
|
|
|
|
## By default or when set to "0s", precision will be set to the same
|
|
## timestamp order as the collection interval, with the maximum being 1s.
|
|
## ie, when interval = "10s", precision will be "1s"
|
|
## when interval = "250ms", precision will be "1ms"
|
|
## Precision will NOT be used for service inputs. It is up to each individual
|
|
## service input to set the timestamp at the appropriate precision.
|
|
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
|
|
precision = ""
|
|
|
|
## Log at debug level.
|
|
# debug = false
|
|
## Log only error level messages.
|
|
# quiet = false
|
|
|
|
## Log target controls the destination for logs and can be one of "file",
|
|
## "stderr" or, on Windows, "eventlog". When set to "file", the output file
|
|
## is determined by the "logfile" setting.
|
|
# logtarget = "file"
|
|
|
|
## Name of the file to be logged to when using the "file" logtarget. If set to
|
|
## the empty string then logs are written to stderr.
|
|
# logfile = ""
|
|
|
|
## The logfile will be rotated after the time interval specified. When set
|
|
## to 0 no time based rotation is performed. Logs are rotated only when
|
|
## written to, if there is no log activity rotation may be delayed.
|
|
# logfile_rotation_interval = "0d"
|
|
|
|
## The logfile will be rotated when it becomes larger than the specified
|
|
## size. When set to 0 no size based rotation is performed.
|
|
# logfile_rotation_max_size = "0MB"
|
|
|
|
## Maximum number of rotated archives to keep, any older logs are deleted.
|
|
## If set to -1, no archives are removed.
|
|
# logfile_rotation_max_archives = 5
|
|
|
|
## Override default hostname, if empty use os.Hostname()
|
|
hostname = ""
|
|
## If set to true, do no set the "host" tag in the telegraf agent.
|
|
omit_hostname = false
|
|
|
|
|
|
###############################################################################
|
|
# OUTPUT PLUGINS #
|
|
###############################################################################
|
|
|
|
[[outputs.influxdb_v2]]
|
|
## The URLs of the InfluxDB cluster nodes.
|
|
##
|
|
## Multiple URLs can be specified for a single cluster, only ONE of the
|
|
## urls will be written to each interval.
|
|
## urls exp: http://127.0.0.1:9999
|
|
urls = ["$INFLUX_HOST"]
|
|
|
|
## Token for authentication.
|
|
token = "$INFLUX_TOKEN"
|
|
|
|
## Organization is the name of the organization you wish to write to; must exist.
|
|
organization = "$INFLUX_ORG"
|
|
|
|
## Destination bucket to write into.
|
|
bucket = "$INFLUX_BUCKET"
|
|
|
|
# Read metrics from one or many vCenters
|
|
[[inputs.vsphere]]
|
|
## List of vCenter URLs to be monitored. These three lines must be uncommented
|
|
## and edited for the plugin to work.
|
|
vcenters = [ "https://$VSPHERE_HOST/sdk" ]
|
|
username = "$vsphere-user"
|
|
password = "$vsphere-password"
|
|
|
|
## VMs
|
|
## Typical VM metrics (if omitted or empty, all metrics are collected)
|
|
# vm_include = [ "/*/vm/**"] # Inventory path to VMs to collect (by default all are collected)
|
|
# vm_exclude = [] # Inventory paths to exclude
|
|
vm_metric_include = [
|
|
"cpu.demand.average",
|
|
"cpu.idle.summation",
|
|
"cpu.latency.average",
|
|
"cpu.readiness.average",
|
|
"cpu.ready.summation",
|
|
"cpu.run.summation",
|
|
"cpu.usagemhz.average",
|
|
"cpu.used.summation",
|
|
"cpu.wait.summation",
|
|
"mem.active.average",
|
|
"mem.granted.average",
|
|
"mem.latency.average",
|
|
"mem.swapin.average",
|
|
"mem.swapinRate.average",
|
|
"mem.swapout.average",
|
|
"mem.swapoutRate.average",
|
|
"mem.usage.average",
|
|
"mem.vmmemctl.average",
|
|
"net.bytesRx.average",
|
|
"net.bytesTx.average",
|
|
"net.droppedRx.summation",
|
|
"net.droppedTx.summation",
|
|
"net.usage.average",
|
|
"power.power.average",
|
|
"virtualDisk.numberReadAveraged.average",
|
|
"virtualDisk.numberWriteAveraged.average",
|
|
"virtualDisk.read.average",
|
|
"virtualDisk.readOIO.latest",
|
|
"virtualDisk.throughput.usage.average",
|
|
"virtualDisk.totalReadLatency.average",
|
|
"virtualDisk.totalWriteLatency.average",
|
|
"virtualDisk.write.average",
|
|
"virtualDisk.writeOIO.latest",
|
|
"sys.uptime.latest",
|
|
]
|
|
# vm_metric_exclude = [] ## Nothing is excluded by default
|
|
# vm_instances = true ## true by default
|
|
|
|
## Hosts
|
|
## Typical host metrics (if omitted or empty, all metrics are collected)
|
|
# host_include = [ "/*/host/**"] # Inventory path to hosts to collect (by default all are collected)
|
|
# host_exclude [] # Inventory paths to exclude
|
|
host_metric_include = [
|
|
"cpu.coreUtilization.average",
|
|
"cpu.costop.summation",
|
|
"cpu.demand.average",
|
|
"cpu.idle.summation",
|
|
"cpu.latency.average",
|
|
"cpu.readiness.average",
|
|
"cpu.ready.summation",
|
|
"cpu.swapwait.summation",
|
|
"cpu.usage.average",
|
|
"cpu.usagemhz.average",
|
|
"cpu.used.summation",
|
|
"cpu.utilization.average",
|
|
"cpu.wait.summation",
|
|
"disk.deviceReadLatency.average",
|
|
"disk.deviceWriteLatency.average",
|
|
"disk.kernelReadLatency.average",
|
|
"disk.kernelWriteLatency.average",
|
|
"disk.numberReadAveraged.average",
|
|
"disk.numberWriteAveraged.average",
|
|
"disk.read.average",
|
|
"disk.totalReadLatency.average",
|
|
"disk.totalWriteLatency.average",
|
|
"disk.write.average",
|
|
"mem.active.average",
|
|
"mem.latency.average",
|
|
"mem.state.latest",
|
|
"mem.swapin.average",
|
|
"mem.swapinRate.average",
|
|
"mem.swapout.average",
|
|
"mem.swapoutRate.average",
|
|
"mem.totalCapacity.average",
|
|
"mem.usage.average",
|
|
"mem.vmmemctl.average",
|
|
"net.bytesRx.average",
|
|
"net.bytesTx.average",
|
|
"net.droppedRx.summation",
|
|
"net.droppedTx.summation",
|
|
"net.errorsRx.summation",
|
|
"net.errorsTx.summation",
|
|
"net.usage.average",
|
|
"power.power.average",
|
|
"storageAdapter.numberReadAveraged.average",
|
|
"storageAdapter.numberWriteAveraged.average",
|
|
"storageAdapter.read.average",
|
|
"storageAdapter.write.average",
|
|
"sys.uptime.latest",
|
|
]
|
|
## Collect IP addresses? Valid values are "ipv4" and "ipv6"
|
|
# ip_addresses = ["ipv6", "ipv4" ]
|
|
|
|
# host_metric_exclude = [] ## Nothing excluded by default
|
|
# host_instances = true ## true by default
|
|
|
|
|
|
## Clusters
|
|
# cluster_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
|
|
# cluster_exclude = [] # Inventory paths to exclude
|
|
# cluster_metric_include = [] ## if omitted or empty, all metrics are collected
|
|
# cluster_metric_exclude = [] ## Nothing excluded by default
|
|
# cluster_instances = false ## false by default
|
|
|
|
## Datastores
|
|
# datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
|
|
# datastore_exclude = [] # Inventory paths to exclude
|
|
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
|
|
# datastore_metric_exclude = [] ## Nothing excluded by default
|
|
# datastore_instances = false ## false by default
|
|
|
|
## Datacenters
|
|
# datacenter_include = [ "/*/host/**"] # Inventory path to clusters to collect (by default all are collected)
|
|
# datacenter_exclude = [] # Inventory paths to exclude
|
|
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
|
|
datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
|
|
# datacenter_instances = false ## false by default
|
|
|
|
## Plugin Settings
|
|
## separator character to use for measurement and field names (default: "_")
|
|
# separator = "_"
|
|
|
|
## number of objects to retreive per query for realtime resources (vms and hosts)
|
|
## set to 64 for vCenter 5.5 and 6.0 (default: 256)
|
|
# max_query_objects = 256
|
|
|
|
## number of metrics to retreive per query for non-realtime resources (clusters and datastores)
|
|
## set to 64 for vCenter 5.5 and 6.0 (default: 256)
|
|
# max_query_metrics = 256
|
|
|
|
## number of go routines to use for collection and discovery of objects and metrics
|
|
# collect_concurrency = 1
|
|
# discover_concurrency = 1
|
|
|
|
## the interval before (re)discovering objects subject to metrics collection (default: 300s)
|
|
# object_discovery_interval = "300s"
|
|
|
|
## timeout applies to any of the api request made to vcenter
|
|
# timeout = "60s"
|
|
|
|
## When set to true, all samples are sent as integers. This makes the output
|
|
## data types backwards compatible with Telegraf 1.9 or lower. Normally all
|
|
## samples from vCenter, with the exception of percentages, are integer
|
|
## values, but under some conditions, some averaging takes place internally in
|
|
## the plugin. Setting this flag to "false" will send values as floats to
|
|
## preserve the full precision when averaging takes place.
|
|
# use_int_samples = true
|
|
|
|
## Custom attributes from vCenter can be very useful for queries in order to slice the
|
|
## metrics along different dimension and for forming ad-hoc relationships. They are disabled
|
|
## by default, since they can add a considerable amount of tags to the resulting metrics. To
|
|
## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include
|
|
## to select the attributes you want to include.
|
|
## By default, since they can add a considerable amount of tags to the resulting metrics. To
|
|
## enable, simply set custom_attribute_exlude to [] (empty set) and use custom_attribute_include
|
|
## to select the attributes you want to include.
|
|
# custom_attribute_include = []
|
|
# custom_attribute_exclude = ["*"]
|
|
|
|
## Optional SSL Config
|
|
# ssl_ca = "/path/to/cafile"
|
|
# ssl_cert = "/path/to/certfile"
|
|
# ssl_key = "/path/to/keyfile"
|
|
## Use SSL but skip chain & host verification
|
|
insecure_skip_verify = true
|