From c5a613f6bf93c3b7b74817f5c71e7cc1e9bdfc7a Mon Sep 17 00:00:00 2001 From: Alban VIDAL Date: Mon, 30 Sep 2019 20:13:08 +0200 Subject: [PATCH] Add check: check_memory --- conf/etc/nagios/nrpe.d/check_nrpe.cfg | 1 + conf/usr/local/bin/check_memory | 343 ++++++++++++++++++++++++++ 2 files changed, 344 insertions(+) create mode 100755 conf/usr/local/bin/check_memory diff --git a/conf/etc/nagios/nrpe.d/check_nrpe.cfg b/conf/etc/nagios/nrpe.d/check_nrpe.cfg index c8e6a3c..cfe05c3 100644 --- a/conf/etc/nagios/nrpe.d/check_nrpe.cfg +++ b/conf/etc/nagios/nrpe.d/check_nrpe.cfg @@ -16,6 +16,7 @@ command[check_available_conntrack] = /usr/local/bin/check_available_conntrack command[check_cpu] = /usr/local/bin/check_cpu +command[check_memory] = /usr/local/bin/check_memory command[check_systemd] = /usr/local/bin/check_systemd # with sudo diff --git a/conf/usr/local/bin/check_memory b/conf/usr/local/bin/check_memory new file mode 100755 index 0000000..3c4b226 --- /dev/null +++ b/conf/usr/local/bin/check_memory @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 + +# Copyright © 2019 Aurélien Grimal + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +##### + +# It is assumed that the /proc/meminfo file use kB as unit. If not, this program will not work. +# Usage examples : +# 1) check_linux_mem.py +# 2) check_linux_mem.py --mem-warn 0.55 --mem-crit 0.8 --swap-warn 0.5 --swap-crit 0.75 + +##### + + +import sys + +result = {'rc': 0, 'text': [], 'perfdata': [], 'params': {}} +default_config_file = '/etc/env_check_nrpe' + +try: + + import re, argparse, traceback + + parser = argparse.ArgumentParser() + + parser.add_argument( + "--mem-warn", + help = "Threshold percent for memory warning (default 70)", + type = int, + default = 70 + ) + parser.add_argument( + "--mem-crit", + help = "Threshold percent for memory critical (default 90)", + type = int, + default = 90 + ) + parser.add_argument( + "--swap-warn", + help = "Threshold percent for memory warning (default 10)", + type = int, + default = 10, + ) + parser.add_argument( + "--swap-crit", + help = "Threshold percent for memory critical (default 50)", + type = int, + default = 50 + ) + parser.add_argument( + "--without-swap", + help = "Deactivate swap monitoring", + action = "store_true" + ) + parser.add_argument( + "--with-mem-locked", + help = "Activate memory locked monitoring", + action = "store_true" + ) + parser.add_argument( + "--dont-use-available", + help = "Warn against used memory instead of not available memory", + action = "store_true" + ) + parser.add_argument( + "--revert", + help = "Show output with free memory instead of used memory", + action = "store_true" + ) + parser.add_argument( + "--unit", + help = "Unit for perfdata (default MB)", + choices = ['B', 'kB', 'MB', 'GB', 'TB'], + default = 'MB' + ) + parser.add_argument( + "--config-file", + help = "Configuration file with bash-style variables declared\nCHECK_MEMORY_MEM_WARN=integer ([0-100])\nCHECK_MEMORY_MEM_CRIT=integer ([0-100])\nCHECK_MEMORY_SWAP_WARN=integer ([0-100])\nCHECK_MEMORY_SWAP_CRIT=integer ([0-100])\nCHECK_MEMORY_WITHOUT_SWAP=boolean ([0|1])\nCHECK_MEMORY_WITH_MEM_LOCKED=boolean ([0|1])\nCHECK_MEMORY_DONT_USE_AVAILABLE=boolean ([0|1])\nCHECK_MEMORY_REVERT=boolean ([0|1])\nCHECK_MEMORY_UNIT=['B','kB','MB','GB,'TB']", + nargs = 1 + ) + + args = parser.parse_args() + + # Define the config file to use + if args.config_file is not None: + config_file = args.config_file[0] + else: + config_file = default_config_file + + true_strings = ['1', 'true', 'True', 'yes', 'y', 'Yes'] + false_strings = ['0', 'false', 'False', 'no', 'n', 'No'] + def check_boolean(string): + if string in true_strings: + return True + elif string in false_strings: + return False + else: + raise ValueError('I can\'t make a boolean out of that :', string) + + # Try to open it + try: + with open(config_file, 'r') as config: + for line in config: + if line.startswith('CHECK_MEMORY_MEM_WARN='): + args.mem_warn = int(re.sub('CHECK_MEMORY_MEM_WARN=', '', line.rstrip())) + if line.startswith('CHECK_MEMORY_MEM_CRIT='): + args.mem_crit = int(re.sub('CHECK_MEMORY_MEM_CRIT=', '', line.rstrip())) + if line.startswith('CHECK_MEMORY_SWAP_WARN='): + args.swap_warn = int(re.sub('CHECK_MEMORY_SWAP_WARN=', '', line.rstrip())) + if line.startswith('CHECK_MEMORY_SWAP_CRIT='): + args.swap_crit = int(re.sub('CHECK_MEMORY_SWAP_CRIT=', '', line.rstrip())) + if line.startswith('CHECK_MEMORY_WITHOUT_SWAP='): + res = re.sub('CHECK_MEMORY_WITHOUT_SWAP=', '', line.rstrip()) + args.without_swap = check_boolean(res) + if line.startswith('CHECK_MEMORY_WITH_MEM_LOCKED='): + res = re.sub('CHECK_MEMORY_WITH_MEM_LOCKED=', '', line.rstrip()) + args.with_mem_locked = check_boolean(res) + if line.startswith('CHECK_MEMORY_DONT_USE_AVAILABLE='): + res = re.sub('CHECK_MEMORY_DONT_USE_AVAILABLE=', '', line.rstrip()) + args.dont_use_available = check_boolean(res) + if line.startswith('CHECK_MEMORY_REVERT='): + res = re.sub('CHECK_MEMORY_REVERT=', '', line.rstrip()) + args.revert = check_boolean(res) + if line.startswith('CHECK_MEMORY_UNIT='): + res = re.sub('CHECK_MEMORY_UNIT', '', line.rstrip()) + if res in ['B', 'kB', 'MB', 'GB', 'TB']: + args.unit = res + else: + raise ValueError('What is this unit ?', res) + except IOError: + if args.config_file is not None: + print("ERROR: the file '" + config_file + "' does not exist !") + sys.exit(2) + except ValueError as e: + print("ERROR: reading the file '" + config_file + "',", e) + sys.exit(2) + + error = False + + # Test mem_warn and mem_crit values + if args.mem_warn < 0: + print("ERROR: --mem-warn can't be negative") + error = True + elif args.mem_warn > 100: + print("ERROR: --mem-warn value exceeds 100") + error = True + if args.mem_crit < 0: + print("ERROR: --mem-crit can't be negative") + error = True + elif args.mem_crit > 100: + print("ERROR: --mem-crit value exceeds 100") + error = True + if args.mem_crit < args.mem_warn: + print("ERROR: --mem-crit value is less than --mem-warn value") + error = True + + # Test swap_warn and swap_crit values + if args.swap_warn < 0: + print("ERROR: --swap-warn can't be negative") + error = True + elif args.swap_warn > 100: + print("ERROR: --swap-warn value exceeds 100") + error = True + if args.swap_crit < 0: + print("ERROR: --swap-crit can't be negative") + error = True + elif args.swap_crit > 100: + print("ERROR: --swap-crit value exceeds 100") + error = True + if args.swap_crit < args.swap_warn: + print("ERROR: --swap-crit value is less than --swap-warn value") + error = True + + if error: + sys.exit(2) + + with open('/proc/meminfo', 'r') as meminfo: + + # Define which params are read in the file + params = [ + 'MemTotal', + 'MemFree', + 'MemAvailable', + 'Buffers', + 'Cached', + 'Mlocked' + ] + + if args.with_mem_locked: + params.append('Mlocked') + + if not args.without_swap: + params.append('SwapTotal') + params.append('SwapFree') + + # Default value to -1 and compile regex + for param in params: + result['params'][param] = { + 'value': -1, + 'regex': re.compile('^' + param + ':\s+([0-9]+)\s+kB\s+$') + } + + # Parse each line of the file + for line in meminfo: + for param, param_dict in result['params'].items(): + re_result = param_dict['regex'].match(line) + if re_result is not None: + param_dict['value'] = int(re_result.groups()[0]) + + # Detect if param is missing from file + for param, param_dict in result['params'].items(): + if param_dict['value'] == -1: + print('ERROR: Missing parameter "' + param + '" in file /proc/meminfo') + error = True + + # Exit if missing param + if error: + sys.exit(2) + + # Analyze memory + result['params']['MemUsed'] = { + 'value': result['params']['MemTotal']['value'] - result['params']['MemFree']['value'] + } + result['params']['MemNotAvailable'] = { + 'value': result['params']['MemTotal']['value'] - result['params']['MemAvailable']['value'] + } + + if args.dont_use_available: + mem_used_percent = int(result['params']['MemUsed']['value'] / result['params']['MemTotal']['value'] * 100) + else: + mem_used_percent = int(result['params']['MemNotAvailable']['value'] / result['params']['MemTotal']['value'] * 100) + + if mem_used_percent > args.mem_warn: + if mem_used_percent > args.mem_crit: + result['rc'] = 2 + else: + result['rc'] = 1 + result['text'].append("MEMORY_USAGE=" + str(mem_used_percent) + "%") + + # Analyze swap + if not args.without_swap: + if result['params']['SwapTotal']['value'] != 0: + result['params']['SwapUsed'] = { + 'value': result['params']['SwapTotal']['value'] - result['params']['SwapFree']['value'] + } + swap_used_percent = int(result['params']['SwapUsed']['value'] / result['params']['SwapTotal']['value'] * 100) + if swap_used_percent > args.swap_warn: + if swap_used_percent > args.swap_crit: + result['rc'] = 2 + else: + result['rc'] = 1 + result['text'].append("SWAP_USAGE=" + str(swap_used_percent) + "%") + + # + # PERFDATA + # + + # Convert units + for param, param_dict in result['params'].items(): + if args.unit == 'B': + param_dict['value'] = param_dict['value'] * 1024 + elif args.unit == 'MB': + param_dict['value'] = int(param_dict['value'] / 1024) + elif args.unit == 'GB': + param_dict['value'] = round(param_dict['value'] / 1024**2, 1) + elif args.unit == 'TB': + param_dict['value'] = round(param_dict['value'] / 1024**3, 2) + + # MemUsed + value = result['params']['MemUsed']['value'] + total_value = result['params']['MemTotal']['value'] + string = 'mem_used=' + str(value) + args.unit + ';' + if args.dont_use_available: + string += str(int(total_value * args.mem_warn)) + ';' + \ + str(int(total_value * args.mem_crit)) + ';' + else: + string += ';;' + string += '0;' + str(total_value) + result['perfdata'].append(string) + + # MemNotAvailable + value = result['params']['MemNotAvailable']['value'] + string = 'mem_not_avail=' + str(value) + args.unit + ';' + if not args.dont_use_available: + string += str(int(total_value * args.mem_warn)) + ';' + \ + str(int(total_value * args.mem_crit)) + ';' + else: + string += ';;' + string += '0;' + str(total_value) + result['perfdata'].append(string) + + # Buffers + Cached + value = result['params']['Buffers']['value'] + result['params']['Cached']['value'] + string = 'buffers_cached=' + str(value) + args.unit + ';;;0' + result['perfdata'].append(string) + + # Mlocked + if args.with_mem_locked: + value = result['params']['Mlocked']['value'] + string = 'mem_locked=' + str(value) + args.unit + ';;;0' + result['perfdata'].append(string) + + # SwapUsed + if not args.without_swap: + value = result['params']['SwapUsed']['value'] + total_value = result['params']['SwapTotal']['value'] + string = 'swap_used=' + str(value) + args.unit + ';' + \ + str(int(total_value * args.swap_warn)) + ';' + \ + str(int(total_value * args.swap_crit)) + ';' + \ + '0;' + str(total_value) + result['perfdata'].append(string) + + # + # OUTPUT AND EXIT + # + + if result['rc'] == 0: + print("OK -", " ".join(result['text']), end='') + elif result['rc'] == 1: + print("WARNING:", " - ".join(result['text']), end='') + else: + print("CRITICAL:", " - ".join(result['text']), end='') + + print(" |", " ".join(result['perfdata'])) + sys.exit(result['rc']) + +except Exception: + print("CRITICAL:", traceback.format_exc()) + print("\n".join(result['text'])) + sys.exit(2)