Add check: check_memory

This commit is contained in:
Alban VIDAL 2019-09-30 20:13:08 +02:00
parent 706fa685ec
commit c5a613f6bf
2 changed files with 344 additions and 0 deletions

View File

@ -16,6 +16,7 @@
command[check_available_conntrack] = /usr/local/bin/check_available_conntrack
command[check_cpu] = /usr/local/bin/check_cpu
command[check_memory] = /usr/local/bin/check_memory
command[check_systemd] = /usr/local/bin/check_systemd
# with sudo

343
conf/usr/local/bin/check_memory Executable file
View File

@ -0,0 +1,343 @@
#!/usr/bin/env python3
# Copyright © 2019 Aurélien Grimal
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#####
# It is assumed that the /proc/meminfo file use kB as unit. If not, this program will not work.
# Usage examples :
# 1) check_linux_mem.py
# 2) check_linux_mem.py --mem-warn 0.55 --mem-crit 0.8 --swap-warn 0.5 --swap-crit 0.75
#####
import sys
result = {'rc': 0, 'text': [], 'perfdata': [], 'params': {}}
default_config_file = '/etc/env_check_nrpe'
try:
import re, argparse, traceback
parser = argparse.ArgumentParser()
parser.add_argument(
"--mem-warn",
help = "Threshold percent for memory warning (default 70)",
type = int,
default = 70
)
parser.add_argument(
"--mem-crit",
help = "Threshold percent for memory critical (default 90)",
type = int,
default = 90
)
parser.add_argument(
"--swap-warn",
help = "Threshold percent for memory warning (default 10)",
type = int,
default = 10,
)
parser.add_argument(
"--swap-crit",
help = "Threshold percent for memory critical (default 50)",
type = int,
default = 50
)
parser.add_argument(
"--without-swap",
help = "Deactivate swap monitoring",
action = "store_true"
)
parser.add_argument(
"--with-mem-locked",
help = "Activate memory locked monitoring",
action = "store_true"
)
parser.add_argument(
"--dont-use-available",
help = "Warn against used memory instead of not available memory",
action = "store_true"
)
parser.add_argument(
"--revert",
help = "Show output with free memory instead of used memory",
action = "store_true"
)
parser.add_argument(
"--unit",
help = "Unit for perfdata (default MB)",
choices = ['B', 'kB', 'MB', 'GB', 'TB'],
default = 'MB'
)
parser.add_argument(
"--config-file",
help = "Configuration file with bash-style variables declared\nCHECK_MEMORY_MEM_WARN=integer ([0-100])\nCHECK_MEMORY_MEM_CRIT=integer ([0-100])\nCHECK_MEMORY_SWAP_WARN=integer ([0-100])\nCHECK_MEMORY_SWAP_CRIT=integer ([0-100])\nCHECK_MEMORY_WITHOUT_SWAP=boolean ([0|1])\nCHECK_MEMORY_WITH_MEM_LOCKED=boolean ([0|1])\nCHECK_MEMORY_DONT_USE_AVAILABLE=boolean ([0|1])\nCHECK_MEMORY_REVERT=boolean ([0|1])\nCHECK_MEMORY_UNIT=['B','kB','MB','GB,'TB']",
nargs = 1
)
args = parser.parse_args()
# Define the config file to use
if args.config_file is not None:
config_file = args.config_file[0]
else:
config_file = default_config_file
true_strings = ['1', 'true', 'True', 'yes', 'y', 'Yes']
false_strings = ['0', 'false', 'False', 'no', 'n', 'No']
def check_boolean(string):
if string in true_strings:
return True
elif string in false_strings:
return False
else:
raise ValueError('I can\'t make a boolean out of that :', string)
# Try to open it
try:
with open(config_file, 'r') as config:
for line in config:
if line.startswith('CHECK_MEMORY_MEM_WARN='):
args.mem_warn = int(re.sub('CHECK_MEMORY_MEM_WARN=', '', line.rstrip()))
if line.startswith('CHECK_MEMORY_MEM_CRIT='):
args.mem_crit = int(re.sub('CHECK_MEMORY_MEM_CRIT=', '', line.rstrip()))
if line.startswith('CHECK_MEMORY_SWAP_WARN='):
args.swap_warn = int(re.sub('CHECK_MEMORY_SWAP_WARN=', '', line.rstrip()))
if line.startswith('CHECK_MEMORY_SWAP_CRIT='):
args.swap_crit = int(re.sub('CHECK_MEMORY_SWAP_CRIT=', '', line.rstrip()))
if line.startswith('CHECK_MEMORY_WITHOUT_SWAP='):
res = re.sub('CHECK_MEMORY_WITHOUT_SWAP=', '', line.rstrip())
args.without_swap = check_boolean(res)
if line.startswith('CHECK_MEMORY_WITH_MEM_LOCKED='):
res = re.sub('CHECK_MEMORY_WITH_MEM_LOCKED=', '', line.rstrip())
args.with_mem_locked = check_boolean(res)
if line.startswith('CHECK_MEMORY_DONT_USE_AVAILABLE='):
res = re.sub('CHECK_MEMORY_DONT_USE_AVAILABLE=', '', line.rstrip())
args.dont_use_available = check_boolean(res)
if line.startswith('CHECK_MEMORY_REVERT='):
res = re.sub('CHECK_MEMORY_REVERT=', '', line.rstrip())
args.revert = check_boolean(res)
if line.startswith('CHECK_MEMORY_UNIT='):
res = re.sub('CHECK_MEMORY_UNIT', '', line.rstrip())
if res in ['B', 'kB', 'MB', 'GB', 'TB']:
args.unit = res
else:
raise ValueError('What is this unit ?', res)
except IOError:
if args.config_file is not None:
print("ERROR: the file '" + config_file + "' does not exist !")
sys.exit(2)
except ValueError as e:
print("ERROR: reading the file '" + config_file + "',", e)
sys.exit(2)
error = False
# Test mem_warn and mem_crit values
if args.mem_warn < 0:
print("ERROR: --mem-warn can't be negative")
error = True
elif args.mem_warn > 100:
print("ERROR: --mem-warn value exceeds 100")
error = True
if args.mem_crit < 0:
print("ERROR: --mem-crit can't be negative")
error = True
elif args.mem_crit > 100:
print("ERROR: --mem-crit value exceeds 100")
error = True
if args.mem_crit < args.mem_warn:
print("ERROR: --mem-crit value is less than --mem-warn value")
error = True
# Test swap_warn and swap_crit values
if args.swap_warn < 0:
print("ERROR: --swap-warn can't be negative")
error = True
elif args.swap_warn > 100:
print("ERROR: --swap-warn value exceeds 100")
error = True
if args.swap_crit < 0:
print("ERROR: --swap-crit can't be negative")
error = True
elif args.swap_crit > 100:
print("ERROR: --swap-crit value exceeds 100")
error = True
if args.swap_crit < args.swap_warn:
print("ERROR: --swap-crit value is less than --swap-warn value")
error = True
if error:
sys.exit(2)
with open('/proc/meminfo', 'r') as meminfo:
# Define which params are read in the file
params = [
'MemTotal',
'MemFree',
'MemAvailable',
'Buffers',
'Cached',
'Mlocked'
]
if args.with_mem_locked:
params.append('Mlocked')
if not args.without_swap:
params.append('SwapTotal')
params.append('SwapFree')
# Default value to -1 and compile regex
for param in params:
result['params'][param] = {
'value': -1,
'regex': re.compile('^' + param + ':\s+([0-9]+)\s+kB\s+$')
}
# Parse each line of the file
for line in meminfo:
for param, param_dict in result['params'].items():
re_result = param_dict['regex'].match(line)
if re_result is not None:
param_dict['value'] = int(re_result.groups()[0])
# Detect if param is missing from file
for param, param_dict in result['params'].items():
if param_dict['value'] == -1:
print('ERROR: Missing parameter "' + param + '" in file /proc/meminfo')
error = True
# Exit if missing param
if error:
sys.exit(2)
# Analyze memory
result['params']['MemUsed'] = {
'value': result['params']['MemTotal']['value'] - result['params']['MemFree']['value']
}
result['params']['MemNotAvailable'] = {
'value': result['params']['MemTotal']['value'] - result['params']['MemAvailable']['value']
}
if args.dont_use_available:
mem_used_percent = int(result['params']['MemUsed']['value'] / result['params']['MemTotal']['value'] * 100)
else:
mem_used_percent = int(result['params']['MemNotAvailable']['value'] / result['params']['MemTotal']['value'] * 100)
if mem_used_percent > args.mem_warn:
if mem_used_percent > args.mem_crit:
result['rc'] = 2
else:
result['rc'] = 1
result['text'].append("MEMORY_USAGE=" + str(mem_used_percent) + "%")
# Analyze swap
if not args.without_swap:
if result['params']['SwapTotal']['value'] != 0:
result['params']['SwapUsed'] = {
'value': result['params']['SwapTotal']['value'] - result['params']['SwapFree']['value']
}
swap_used_percent = int(result['params']['SwapUsed']['value'] / result['params']['SwapTotal']['value'] * 100)
if swap_used_percent > args.swap_warn:
if swap_used_percent > args.swap_crit:
result['rc'] = 2
else:
result['rc'] = 1
result['text'].append("SWAP_USAGE=" + str(swap_used_percent) + "%")
#
# PERFDATA
#
# Convert units
for param, param_dict in result['params'].items():
if args.unit == 'B':
param_dict['value'] = param_dict['value'] * 1024
elif args.unit == 'MB':
param_dict['value'] = int(param_dict['value'] / 1024)
elif args.unit == 'GB':
param_dict['value'] = round(param_dict['value'] / 1024**2, 1)
elif args.unit == 'TB':
param_dict['value'] = round(param_dict['value'] / 1024**3, 2)
# MemUsed
value = result['params']['MemUsed']['value']
total_value = result['params']['MemTotal']['value']
string = 'mem_used=' + str(value) + args.unit + ';'
if args.dont_use_available:
string += str(int(total_value * args.mem_warn)) + ';' + \
str(int(total_value * args.mem_crit)) + ';'
else:
string += ';;'
string += '0;' + str(total_value)
result['perfdata'].append(string)
# MemNotAvailable
value = result['params']['MemNotAvailable']['value']
string = 'mem_not_avail=' + str(value) + args.unit + ';'
if not args.dont_use_available:
string += str(int(total_value * args.mem_warn)) + ';' + \
str(int(total_value * args.mem_crit)) + ';'
else:
string += ';;'
string += '0;' + str(total_value)
result['perfdata'].append(string)
# Buffers + Cached
value = result['params']['Buffers']['value'] + result['params']['Cached']['value']
string = 'buffers_cached=' + str(value) + args.unit + ';;;0'
result['perfdata'].append(string)
# Mlocked
if args.with_mem_locked:
value = result['params']['Mlocked']['value']
string = 'mem_locked=' + str(value) + args.unit + ';;;0'
result['perfdata'].append(string)
# SwapUsed
if not args.without_swap:
value = result['params']['SwapUsed']['value']
total_value = result['params']['SwapTotal']['value']
string = 'swap_used=' + str(value) + args.unit + ';' + \
str(int(total_value * args.swap_warn)) + ';' + \
str(int(total_value * args.swap_crit)) + ';' + \
'0;' + str(total_value)
result['perfdata'].append(string)
#
# OUTPUT AND EXIT
#
if result['rc'] == 0:
print("OK -", " ".join(result['text']), end='')
elif result['rc'] == 1:
print("WARNING:", " - ".join(result['text']), end='')
else:
print("CRITICAL:", " - ".join(result['text']), end='')
print(" |", " ".join(result['perfdata']))
sys.exit(result['rc'])
except Exception:
print("CRITICAL:", traceback.format_exc())
print("\n".join(result['text']))
sys.exit(2)