New syntax allowing the use of code in parameters - reed-alert - Lightweight agentless alerting system for server HTML git clone git://bitreich.org/reed-alert/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/reed-alert/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 3f03224030fd0b48e2de384f56c78834da14643b DIR parent 01a3f1cc34988cd7ad739d41186c4362ede1fdf8 HTML Author: Solene Rapenne <solene@perso.pw> Date: Thu, 11 Jan 2018 15:03:46 +0100 New syntax allowing the use of code in parameters Diffstat: M README | 44 ++++++++++++++++---------------- M config.lisp.sample | 16 ++++++++-------- M example.lisp | 56 +++++++++++++++++-------------- M functions.lisp | 31 +++++++++++++++++-------------- 4 files changed, 77 insertions(+), 70 deletions(-) --- DIR diff --git a/README b/README @@ -144,7 +144,7 @@ Check if the actual number of processes of the system exceeds a specific limit. > Set the limit that will trigger an alert when exceeded. :limit INTEGER -Example : `(=> alert number-of-processes (:limit 200))` +Example : `(=> alert number-of-processes :limit 200)` pid-running @@ -154,7 +154,7 @@ Check if the PID number found in a .pid file is alive. > Set the path of the pid file. If $USER doesn't have permission to open it, return "file not found". :path "STRING" -Example : `(=> alert pid-running (:path "/var/run/nginx.pid"))` +Example : `(=> alert pid-running :path "/var/run/nginx.pid")` disk-usage @@ -167,7 +167,7 @@ Check if the disk-usage of a chosen partition does exceed a specific limit. > Set the limit that will trigger an alert when exceeded. :limit INTEGER -Example : `(=> alert disk-usage (:path "/tmp" :limit 50))` +Example : `(=> alert disk-usage :path "/tmp" :limit 50)` file-exists @@ -177,7 +177,7 @@ Check if a file exists. > Set the path of the file to check. :path "STRING" -Example : `(=> alert file-exists (:path "/var/postgresql/standby"))` +Example : `(=> alert file-exists :path "/var/postgresql/standby")` file-updated @@ -190,7 +190,7 @@ Check if a file exists and has been updated since a defined time. > Set the limit in minutes since the last modification time before triggering an alert. :limit INTEGER -Example : `(=> alert file-updated (:path "/var/log/nginx/access.log" :limit 60))` +Example : `(=> alert file-updated :path "/var/log/nginx/access.log" :limit 60)` load-average-1 @@ -200,7 +200,7 @@ Check if the load average during the last minute exceeds a specific limit. > Set the limit not to exceed. :limit INTEGER -Example : `(=> alert load-average-1 (:limit 2))` +Example : `(=> alert load-average-1 :limit 2)` load-average-5 @@ -210,7 +210,7 @@ Check if the load average during the last five minutes exceeds a specific limit. > Set the limit not to exceed. :limit INTEGER -Example : `(=> alert load-average-5 (:limit 2))` +Example : `(=> alert load-average-5 :limit 2)` load-average-15 @@ -220,7 +220,7 @@ Check if the load average during the last fifteen minutes exceeds a specific lim > Set the limit not to exceed. :limit INTEGER -Example : `(=> alert load-average-15 (:limit 2))` +Example : `(=> alert load-average-15 :limit 2)` ping @@ -230,7 +230,7 @@ Check if a remote host answers the 2 ICMP ping. > Set the host to ping. Return an error if ping command returns non-zero. :host "STRING" (can be IP or hostname) -Example : `(=> alert ping (:host "8.8.8.8"))` +Example : `(=> alert ping :host "8.8.8.8")` command @@ -241,7 +241,7 @@ This may be the most useful probe because it let the user do any check needed. > Command to execute, accept commands with pipes. :command "STRING" -Example : `(=> alert command (:command "tail -n 10 /var/log/messages | grep -v CRITICAL"))` +Example : `(=> alert command :command "tail -n 10 /var/log/messages | grep -v CRITICAL")` service ------- @@ -250,7 +250,7 @@ Check if a service is started on the system. > Set the name of the service to test :name STRING -Example : `(=> alert service (:name "mysql-server"))` +Example : `(=> alert service :name "mysql-server")` file-less-than -------------- @@ -262,7 +262,7 @@ Check if a file has a size less than a specified limit. > Set the limit in bytes before triggering an alert. :limit INTEGER -Example : `(=> alert file-less-than (:path "/var/log/nginx.log" :limit 60))` +Example : `(=> alert file-less-than :path "/var/log/nginx.log" :limit 60)` The configuration file @@ -278,13 +278,13 @@ It's possible to write loops if you don't want to repeat code (loop for host in '("bitreich.org" "dataswamp.org" "floodgap.com") do - (=> mail ping (:host host))) + (=> mail ping :host host)) or another example (loop for service in '("smtpd" "nginx" "mysqld" "postgresql") do - (=> mail service (:name service))) + (=> mail service :name service)) and another example using rows from a file to check remote hosts @@ -292,7 +292,7 @@ and another example using rows from a file to check remote hosts (loop for line = (read-line stream nil) while line do - (=> mail ping (:host line)))) + (=> mail ping :host line))) Conditional @@ -310,9 +310,9 @@ router, probes requiring the router to work will trigger errors so we should skip them. (stop-if-error - (=> mail ping (:host "192.168.1.1" :desc "My local router")) - (=> mail ping (:host "89.89.89.89" :desc "My ISP DNS server")) - (=> mail ping (:host "kernel.org" :desc "Remote website"))) + (=> mail ping :host "192.168.1.1" :desc "My local router") + (=> mail ping :host "89.89.89.89" :desc "My ISP DNS server") + (=> mail ping :host "kernel.org" :desc "Remote website")) Note : stop-if-error is an alias for the **and** function. @@ -326,10 +326,10 @@ the detection and fixing it. You could want to receive a mail when things need to be fixed on spare time, but mail another people if things aren't fixed after some level. -(escalation - (=> mail-me disk-usage (:path "/" :limit 70)) - (=> sms-me disk-usage (:path "/" :limit 90)) - (=> buzzer disk-usage (:path "/" :limit 98))) + (escalation + (=> mail-me disk-usage :path "/" :limit 70) + (=> sms-me disk-usage :path "/" :limit 90) + (=> buzzer disk-usage :path "/" :limit 98)) In this example, we check the disk usage, I will get a mail through "mail-me" alert if the disk usage go get more than 70%. Once it goes DIR diff --git a/config.lisp.sample b/config.lisp.sample @@ -3,17 +3,17 @@ (alert mail "echo -n 'Problem with %function% %date% %params%' | mail -s alarm mail@isp.net") (alert sms "/home/user/sms.sh '%date% %function% %params% %hostname%") (alert available-variables "REMINDER : %function% %params% %date% %hostname% %desc% %level% %os% %newline% %result%") -(alert void "") +(alert empty "") ;; this is a comment ; this is also a comment -(=> mail disk-usage (:path "/" :limit 90)) +(=> mail disk-usage :path "/" :limit 90) -(=> mail service (:name "dovecot")) -(=> mail service (:name "httpd")) -(=> sms service (:name "smtpd")) -(=> mail number-of-processes (:limit 100)) +(=> mail service :name "dovecot") +(=> mail service :name "httpd") +(=> sms service :name "smtpd") +(=> mail number-of-processes :limit 100) -(=> mail ping (:host "bitreich.org" :desc "Ping Bitreich")) -(=> mail ping (:host "openbsd.org" :desc "Ping OpenBSD.org")) +(=> mail ping :host "bitreich.org" :desc "Ping Bitreich") +(=> mail ping :host "openbsd.org" :desc "Ping OpenBSD.org") DIR diff --git a/example.lisp b/example.lisp @@ -1,58 +1,62 @@ (load "functions.lisp") (alert dont-use-it "REMINDER %function% %params% %date% %hostname% %desc% %level% %os% %newline% _ %space% %result%") -(alert void "") +(alert empty "") (alert mail "") +(alert peroket "echo 'problem at %date% with %function% %params%'") (alert sms "echo -n '%date% %function% CRITICAL on %hostname%' | curl http://somewebservice") ;(alert mail "echo -n '%date% %hostname% had problem on %function% %newline% %params% values %result% %newline% ; %desc%' | mail -s '[Error] %function% - %hostname%' foo@bar.com") ;; check if used percent :path partition is more than :limit -(=> mail disk-usage (:path "/" :limit 90)) -(=> mail disk-usage (:path "/usr" :limit 85)) -(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure +(=> peroket disk-usage :path "/" :limit 90) +(=> peroket disk-usage :path "/usr" :limit 85) +(=> peroket disk-usage :path "/tmp" :limit 0) ;; failure ;; check if :path file exists -(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd")) -(=> void file-exists (:path "/non-existant-file")) ;; failure file not found +(=> mail file-exists :path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd") +(=> empty file-exists :path "/non-existant-file") ;; failure file not found ;; check if :path file exists and has been updated since :limit minutes -(=> void file-updated (:path "/var/log/messages" :limit 400)) -(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure +(=> empty file-updated :path "/var/log/messages" :limit 400) +(=> mail file-updated :path "/bsd.rd" :limit 1 :desc "OpenBSD kernel") ;; failure ;; check if :path pid file process is running -(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid")) -(=> mail pid-running (:path "/home/user/test.pid")) ;; failure +(=> mail pid-running :path "/var/run/xdm.pid" :desc "XDM pid") +(=> mail pid-running :path "/home/user/test.pid") ;; failure ;; check if number of processes on the system is more than :limit -(=> mail number-of-processes (:limit 200)) -(=> mail number-of-processes (:limit 1)) ;; failure +(=> mail number-of-processes :limit 200) +(=> mail number-of-processes :limit 1) ;; failure ;; check if service is running -(=> mail service (:name "httpd")) -(=> mail service (:name "ospfd")) ;; failure : not started -(=> mail service (:name "unknown")) ;; failure : not known +(=> mail service :name "httpd") +(=> mail service :name "ospfd") ;; failure : not started +(=> mail service :name "unknown") ;; failure : not known ;; check if load average on (1/5/15) minutes is more than :limit -(=> mail load-average-1 (:limit 4)) -;;(=> mail load-average-5 (:limit 2)) -;;(=> mail load-average-15 (:limit 1)) -(=> mail load-average-1 (:limit 0.2)) ;; should trigger error +(=> mail load-average-1 :limit 4) +;;(=> mail load-average-5 :limit 2) +;;(=> mail load-average-15 :limit 1) +(=> mail load-average-1 :limit 0.2) ;; should trigger error ;; check if :host host is reachable -;;(=> mail ping (:host "8.8.8.8" :desc "Google DNS")) -;;(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out +;;(=> mail ping :host "8.8.8.8" :desc "Google DNS") +;;(=> empty ping :host "127.40.30.21" :desc "Certainly not used address") ;; fail time out +(loop for host in (list "8.8.8.8" "8.8.4.4" "127.0.0.1") + do + (=> empty ping :host host)) ;; check if :command command return 0 (success) or something else (error) -(=> void command (:command "echo hello")) ;; success -(=> void command (:command "ls /non-existent-file")) ;; fail +(=> empty command :command "echo hello") ;; success +(=> empty command :command "ls /non-existent-file") ;; fail ;; check if web page :url answer under :limit -(=> void command (:command "curl -m 10 http://google.fr/")) +(=> empty command :command "curl -m 10 http://google.fr/") ;; check if the web page :url contains the text regex :pattern -(=> void command (:command "curl http://google.fr/ | grep html")) -(=> void command (:command "curl http://google.fr/ | grep hello")) ;; error +(=> empty command :command "curl http://google.fr/ | grep html") +(=> empty command :command "curl http://google.fr/ | grep hello") ;; error (quit) DIR diff --git a/functions.lisp b/functions.lisp @@ -25,7 +25,9 @@ while pos))) (defmacro create-probe(name &body code) - `(progn (defun ,name(params) ,@code))) + `(progn + (defparameter ,name ',name) + (defun ,name(params) ,@code))) (defun get-file-size(path) (with-open-file (stream path) @@ -38,7 +40,9 @@ (list nil (format nil "return code = ~a" code))))) (defmacro alert(name string) - `(progn (push (list ',name ,string) + `(progn + (defparameter ,name ',name) + (push (list ',name ,string) *alerts*))) (defun trigger-alert(level function params result) @@ -67,17 +71,16 @@ `(progn (or ,@body))) -(defmacro =>(level fonction params) - `(progn - (format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params)) - (let ((result (funcall ',fonction ',params))) - (if (not (listp result)) - (progn - (format t " => ~asuccess~a~%" *green* *white*) - t) - (progn - (format t " => ~aerror~a~%" *red* *white*) - (uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t) - nil))))) +(defun =>(level fonction &rest params) + (format t "[~a~a ~20A~a] ~35A" *yellow* level fonction *white* (getf params :desc params)) + (let ((result (funcall fonction params))) + (if (not (listp result)) + (progn + (format t " => ~asuccess~a~%" *green* *white*) + t) + (progn + (format t " => ~aerror~a~%" *red* *white*) + (uiop:run-program (trigger-alert level fonction params (cadr result)) :output t) + nil)))) (load "probes.lisp")