Commit ff976953 authored by root's avatar root
Browse files

Merge branch 'master' of https://github.com/tribe29/checkmk

parents ac4d391e eb669a08
......@@ -4,17 +4,16 @@
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
import calendar
import time
from typing import Any, Iterator, Mapping, NamedTuple, Optional, Sequence
StringTable = list[list[str]]
Service = tuple[Optional[str], Mapping[str, str]]
DiscoveryResult = Iterator[Service]
CheckResult = Iterator[tuple[int, str]]
from typing import Any, Mapping, NamedTuple, Optional, Sequence
from cmk.base.plugins.agent_based.agent_based_api.v1 import render, Result, Service, State
from cmk.base.plugins.agent_based.agent_based_api.v1.type_defs import (
CheckResult,
DiscoveryResult,
StringTable,
)
# Example outputs from agent:
# <<<heartbeat_crm>>>
......@@ -45,7 +44,7 @@ factory_settings["heartbeat_crm_default_levels"] = {
class _Cluster(NamedTuple):
last_updated: Optional[str]
last_updated: str
dc: Optional[str]
num_nodes: Optional[int]
num_resources: Optional[int]
......@@ -73,9 +72,9 @@ def _parse_for_error(first_line: str) -> Optional[str]:
def heartbeat_crm_parse_general(string_table: StringTable) -> _Cluster:
if (error := _parse_for_error(" ".join(string_table[0]))) is not None:
return _Cluster(None, None, None, None, error)
return _Cluster("", None, None, None, error)
last_updated = None
last_updated = ""
dc = None
num_nodes = None
num_resources = None
......@@ -221,11 +220,13 @@ def parse_heartbeat_crm(string_table: StringTable) -> Optional[Section]:
def discover_heartbeat_crm(section: Section) -> DiscoveryResult:
params = host_extra_conf_merged(host_name(), inventory_heartbeat_crm_rules)
yield None, {
"num_nodes": section.cluster.num_nodes,
"num_resources": section.cluster.num_resources,
**({"dc": section.cluster.dc} if params.get("naildown_dc", False) else {}),
}
yield Service(
parameters={
"num_nodes": section.cluster.num_nodes,
"num_resources": section.cluster.num_resources,
**({"dc": section.cluster.dc} if params.get("naildown_dc", False) else {}),
}
)
def check_heartbeat_crm(_no_item, params: Mapping[str, Any], section: Section) -> CheckResult:
......@@ -233,45 +234,52 @@ def check_heartbeat_crm(_no_item, params: Mapping[str, Any], section: Section) -
last_updated, dc, num_nodes, num_resources, error = section.cluster
if error is not None:
yield 2, error
yield Result(state=State.CRIT, summary=error)
return
# Check the freshness of the crm_mon output and terminate with CRITICAL
# when too old information are found
dt = utc_mktime(time.strptime(last_updated, "%a %b %d %H:%M:%S %Y"))
dt = calendar.timegm((time.strptime(last_updated, "%a %b %d %H:%M:%S %Y")))
now = time.time()
delta = now - dt
if delta > params["max_age"]:
yield 2, "Ignoring reported data (Status output too old: %s)" % (
get_age_human_readable(delta),
yield Result(
state=State.CRIT,
summary=f"Ignoring reported data (Status output too old: {render.timespan(delta)})",
)
return
# Check for correct DC when enabled
if params.get("dc") is None or dc == params["dc"]:
yield 0, "DC: %s" % (dc,)
yield Result(state=State.OK, summary=f"DC: {dc}")
else:
yield 2, "DC: %s (Expected %s)" % (dc, params["dc"])
yield Result(state=State.CRIT, summary=f"DC: {dc} (Expected {params['dc']})")
# Check for number of nodes when enabled
if params["num_nodes"] is not None and num_nodes is not None:
if num_nodes == params["num_nodes"]:
yield 0, "Nodes: %d" % (num_nodes,)
yield Result(state=State.OK, summary="Nodes: %d" % (num_nodes,))
else:
yield 2, "Nodes: %d (Expected %d)" % (num_nodes, params["num_nodes"])
yield Result(
state=State.CRIT,
summary="Nodes: %d (Expected %d)" % (num_nodes, params["num_nodes"]),
)
# Check for number of resources when enabled
if params["num_resources"] is not None and num_resources is not None:
if num_resources == params["num_resources"]:
yield 0, "Resources: %d" % (num_resources,)
yield Result(state=State.OK, summary="Resources: %d" % (num_resources,))
else:
yield 2, "Resources: %d (Expected %d)" % (num_resources, params["num_resources"])
yield Result(
state=State.CRIT,
summary="Resources: %d (Expected %d)" % (num_resources, params["num_resources"]),
)
if not params.get("show_failed_actions"):
return
for action in section.resources.failed_actions:
yield 1, f"Failed: {action}"
yield Result(state=State.WARN, summary=f"Failed: {action}")
check_info["heartbeat_crm"] = {
......@@ -340,7 +348,7 @@ def _join_lines(lines: Sequence[str]) -> Sequence[str]:
return joined
def inventory_heartbeat_crm_resources(section: Section) -> DiscoveryResult:
def discover_heartbeat_crm_resources(section: Section) -> DiscoveryResult:
# Full list of resources:
# Resource Group: group_slapmaster
# resource_virtip1 (ocf::heartbeat:IPaddr): Started mwp
......@@ -353,39 +361,42 @@ def inventory_heartbeat_crm_resources(section: Section) -> DiscoveryResult:
# In naildown mode only resources which are started somewhere can be
# inventorized
if settings.get("naildown_resources", False) and resources[0][2] != "Stopped":
yield name, {"expected_node": resources[0][3]}
yield Service(item=name, parameters={"expected_node": resources[0][3]})
else:
yield name, {}
yield Service(item=name)
def check_heartbeat_crm_resources(
item: str,
params: Mapping[str, str],
params: Mapping[str, Optional[str]],
section: Section,
) -> CheckResult:
if (resources := section.resources.resources.get(item)) is None:
return
if not len(resources):
yield 0, "No resources found"
yield Result(state=State.OK, summary="No resources found")
for resource in resources:
yield 0, " ".join([isinstance(p, list) and ", ".join(p) or p for p in resource])
yield Result(
state=State.OK,
summary=" ".join([isinstance(p, list) and ", ".join(p) or p for p in resource]),
)
if len(resource) == 3 and resource[2] != "Started":
yield 2, 'Resource is in state "%s"' % (resource[2],)
yield Result(state=State.CRIT, summary='Resource is in state "%s"' % (resource[2],))
elif (
(target_node := params.get("expected_node"))
and target_node != resource[3]
and resource[1] != "Slave"
and resource[1] != "Clone"
):
yield 2, "Expected node: %s" % (target_node,)
yield Result(state=State.CRIT, summary="Expected node: %s" % (target_node,))
check_info["heartbeat_crm.resources"] = {
"check_function": check_heartbeat_crm_resources,
"inventory_function": inventory_heartbeat_crm_resources,
"inventory_function": discover_heartbeat_crm_resources,
"service_description": "Heartbeat CRM %s",
"group": "heartbeat_crm_resources",
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
from typing import NewType, Optional
from .agent_based_api.v1 import Attributes, register
from .agent_based_api.v1.type_defs import InventoryResult, StringTable
Version = NewType("Version", str)
def parse_citrix_controller(string_table: StringTable) -> Optional[Version]:
for line in string_table:
if line[0] == "ControllerVersion":
return Version(line[1])
return None
register.agent_section(
name="citrix_controller",
parse_function=parse_citrix_controller,
)
def inventory_citrix_controller(section: Version) -> InventoryResult:
yield Attributes(
path=["software", "applications", "citrix", "controller"],
inventory_attributes={
"controller_version": section,
},
)
register.inventory_plugin(
name="citrix_controller",
inventory_function=inventory_citrix_controller,
)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
from typing import Mapping
from .agent_based_api.v1 import Attributes, register
from .agent_based_api.v1.type_defs import InventoryResult, StringTable
Section = Mapping[str, str]
def parse_citrix_state(string_table: StringTable) -> Section:
return {key: " ".join(rest) for key, *rest in string_table}
register.agent_section(
name="citrix_state",
parse_function=parse_citrix_state,
)
def inventory_citrix_state(section: Section) -> InventoryResult:
yield Attributes(
path=["software", "applications", "citrix", "vm"],
inventory_attributes={
k: v
for k, kp in (
("desktop_group_name", "DesktopGroupName"),
("catalog", "Catalog"),
("agent_version", "AgentVersion"),
)
if (v := section.get(kp)) is not None
},
)
register.inventory_plugin(
name="citrix_state",
inventory_function=inventory_citrix_state,
)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
def inv_citrix_controller(info):
node = inv_tree("software.applications.citrix.controller.")
for line in info:
if line[0] == "ControllerVersion":
node["controller_version"] = line[1]
inv_info["citrix_controller"] = {
"inv_function": inv_citrix_controller,
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
def inv_citrix_state(info):
node = inv_tree("software.applications.citrix.vm.")
for line in info:
if line[0] == "DesktopGroupName":
node["desktop_group_name"] = " ".join(line[1:])
elif line[0] == "Catalog":
node["catalog"] = " ".join(line[1:])
elif line[0] == "AgentVersion":
node["agent_version"] = line[1]
inv_info["citrix_state"] = {
"inv_function": inv_citrix_state,
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
# yapf: disable
# type: ignore
checkname = 'heartbeat_crm'
freeze_time = '2019-04-11 12:38:36'
info = [[u'Stack:', u'corosync'],
[u'Current',
u'DC:',
u'hrssc61i02',
u'(version',
u'1.1.19+20180928.0d2680780-1.8-1.1.19+20180928.0d2680780)',
u'-',
u'partition',
u'with',
u'quorum'],
[u'Last', u'updated:', u'Mon', u'Mar', u'11', u'14:17:33', u'2019'],
[u'Last',
u'change:',
u'Thu',
u'Feb',
u'28',
u'17:40:07',
u'2019',
u'by',
u'hacluster',
u'via',
u'cibadmin',
u'on',
u'hrssc61i01'],
[u'2', u'nodes', u'configured'],
[u'10', u'resources', u'configured'],
[u'Online:', u'[', u'hrssc61i01', u'hrssc61i02', u']'],
[u'Full', u'list', u'of', u'resources:'],
[u'Resource', u'Group:', u'grp_IFG_ASCS22'],
[u'_',
u'rsc_ip_IFG_ASCS22',
u'(ocf::heartbeat:IPaddr2):',
u'Started',
u'hrssc61i01'],
[u'_',
u'rsc_sap_IFG_ASCS22',
u'(ocf::heartbeat:SAPInstance):',
u'Started',
u'hrssc61i01'],
[u'Resource', u'Group:', u'grp_IFG_ERS23'],
[u'_',
u'rsc_ip_IFG_ERS23',
u'(ocf::heartbeat:IPaddr2):',
u'Started',
u'hrssc61i02'],
[u'_',
u'rsc_sap_IFG_ERS23',
u'(ocf::heartbeat:SAPInstance):',
u'Started',
u'hrssc61i02'],
[u'Clone', u'Set:', u'clone_nfs_sapmnt_IFG', u'[nfs_sapmnt_IFG]'],
[u'_', u'Started:', u'[', u'hrssc61i01', u'hrssc61i02', u']'],
[u'Clone', u'Set:', u'clone_nfs_usr_sap_IFG', u'[nfs_usr_sap_IFG]'],
[u'_', u'Started:', u'[', u'hrssc61i01', u'hrssc61i02', u']'],
[u'st-vmware', u'(stonith:fence_vmware_rest):', u'Started', u'hrssc61i02'],
[u'st-vmware2', u'(stonith:fence_vmware_rest):', u'Started', u'hrssc61i01'],
[u'Failed', u'Resource', u'Actions:'],
[u'*',
u'st-vmware_monitor_20000',
u'on',
u'hrssc61i02',
u"'unknown",
u"error'",
u'(1):',
u'call=43,',
u'status=Error,',
u"exitreason='',"],
[u'_',
u"last-rc-change='Mon",
u'Mar',
u'4',
u'09:29:54',
u"2019',",
u'queued=0ms,',
u'exec=11096ms'],
[u'*',
u'st-vmware2_monitor_20000',
u'on',
u'hrssc61i01',
u"'unknown",
u"error'",
u'(1):',
u'call=43,',
u'status=Error,',
u"exitreason='',"],
[u'_',
u"last-rc-change='Mon",
u'Mar',
u'4',
u'09:29:54',
u"2019',",
u'queued=0ms,',
u'exec=11088ms']]
discovery = {'': [(None, {'num_nodes': 2, 'num_resources': 10})],
'resources': [(u'clone_nfs_sapmnt_IFG', {}),
(u'clone_nfs_usr_sap_IFG', {}),
(u'grp_IFG_ASCS22', {}),
(u'grp_IFG_ERS23', {}),
(u'st-vmware', {}),
(u'st-vmware2', {})]}
checks = {'': [(None,
{'max_age': 60, 'num_nodes': 2, 'num_resources': 10},
[(2, 'Ignoring reported data (Status output too old: 31 d)', [])])],
'resources': [(u'clone_nfs_sapmnt_IFG',
{},
[(0,
u"clone_nfs_sapmnt_IFG Clone Started hrssc61i01, hrssc61i02",
[])]),
(u'clone_nfs_usr_sap_IFG',
{},
[(0,
u"clone_nfs_usr_sap_IFG Clone Started hrssc61i01, hrssc61i02",
[])]),
(u'grp_IFG_ASCS22',
{},
[(0,
u'rsc_ip_IFG_ASCS22 (ocf::heartbeat:IPaddr2): Started hrssc61i01',
[]),
(0,
u'rsc_sap_IFG_ASCS22 (ocf::heartbeat:SAPInstance): Started hrssc61i01',
[])
]),
(u'grp_IFG_ERS23',
{},
[(0,
u'rsc_ip_IFG_ERS23 (ocf::heartbeat:IPaddr2): Started hrssc61i02',
[]),
(0,
u'rsc_sap_IFG_ERS23 (ocf::heartbeat:SAPInstance): Started hrssc61i02',
[])]),
(u'st-vmware',
{},
[(0,
u'st-vmware (stonith:fence_vmware_rest): Started hrssc61i02',
[])]),
(u'st-vmware2',
{},
[(0,
u'st-vmware2 (stonith:fence_vmware_rest): Started hrssc61i01',
[])])]}
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
# yapf: disable
# type: ignore
checkname = 'heartbeat_crm'
freeze_time = '2020-09-08 10:36:36'
info = [
['Cluster', 'Summary:'], ['_*', 'Stack:', 'corosync'],
[
'_*', 'Current', 'DC:', 'ha02', '(version',
'2.0.3-5.el8_2.1-4b1f869f0f)', '-', 'partition', 'with', 'quorum'
], ['_*', 'Last', 'updated:', 'Tue', 'Sep', '8', '10:36:12', '2020'],
[
'_*', 'Last', 'change:', 'Mon', 'Sep', '7', '22:33:23', '2020', 'by',
'root', 'via', 'cibadmin', 'on', 'ha01'
], ['_*', '2', 'nodes', 'configured'],
['_*', '3', 'resource', 'instances', 'configured'], ['Node', 'List:'],
['_*', 'Online:', '[', 'ha01', 'ha02', ']'],
['Full', 'List', 'of', 'Resources:'],
['_*', 'vip', '(ocf::heartbeat:IPaddr):', 'Started', 'ha01'],
['_*', 'Clone', 'Set:', 'splunk-clone', '[splunk]:'],
['_', '*', 'Started:', '[', 'ha01', 'ha02', ']']
]
discovery = {
'': [(None, {
'num_nodes': 2,
'num_resources': 3
})],
'resources': []
}
checks = {
'': [
(
None, {
'max_age': 600,
'num_nodes': None,
'num_resources': None
}, [(0, 'DC: ha02', [])]
),
(
None, {
'max_age': 60,
'num_nodes': 2,
'num_resources': 3
}, [
(0, 'DC: ha02', []), (0, 'Nodes: 2', []),
(0, 'Resources: 3', [])
]
)
]
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 tribe29 GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
# yapf: disable
# type: ignore
checkname = 'heartbeat_crm'
freeze_time = '2019-04-11 12:38:36'
info = [[u'Stack:', u'corosync'],
[u'Current',
u'DC:',
u'cluster',
u'(version',
u'1.1.16-12.el7_4.8-94ff4df)',
u'-',
u'partition',
u'with',
u'quorum'],
[u'Last', u'updated:', u'Tue', u'Oct', u'26', u'13:58:47', u'2019'],
[u'Last',
u'change:',
u'Sat',
u'Oct',
u'24',
u'10:54:28',
u'2019',
u'by',
u'root',
u'via',
u'cibadmin',
u'on',
u'cluster'],
[u'2', u'nodes', u'configured'],
[u'6', u'resources', u'configured'],
[u'Online:', u'[', u'cluster1', u'cluster2', u']'],
[u'Full', u'list', u'of', u'resources:'],
[u'Resource', u'Group:', u'mysqldb1'],
[u'_', u'mysqldb1_lvm', u'(ocf::heartbeat:LVM):Started', u'cluster1'],
[u'_', u'mysqldb1_fs', u'(ocf::heartbeat:Filesystem):Started', u'cluster1'],
[u'_', u'mysqldb1_ip', u'(ocf::heartbeat:IPaddr2):Started', u'cluster1'],
[u'_', u'mysqldb1_mysql', u'(service:mysqldb1):Started', u'cluster1'],
[u'cluster1_fence(stonith:fence_ipmilan):', u'Started', u'cluster2'],
[u'cluster2_fence(stonith:fence_ipmilan):', u'Started', u'cluster1'],
[u'Failed', u'Actions:'],