New syntax allowing the use of code in parameters - reed-alert - Lightweight agentless alerting system for server
HTML git clone git://bitreich.org/reed-alert/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/reed-alert/
DIR Log
DIR Files
DIR Refs
DIR Tags
DIR README
DIR LICENSE
---
DIR commit 3f03224030fd0b48e2de384f56c78834da14643b
DIR parent 01a3f1cc34988cd7ad739d41186c4362ede1fdf8
HTML Author: Solene Rapenne <solene@perso.pw>
Date: Thu, 11 Jan 2018 15:03:46 +0100
New syntax allowing the use of code in parameters
Diffstat:
M README | 44 ++++++++++++++++----------------
M config.lisp.sample | 16 ++++++++--------
M example.lisp | 56 +++++++++++++++++--------------
M functions.lisp | 31 +++++++++++++++++--------------
4 files changed, 77 insertions(+), 70 deletions(-)
---
DIR diff --git a/README b/README
@@ -144,7 +144,7 @@ Check if the actual number of processes of the system exceeds a specific limit.
> Set the limit that will trigger an alert when exceeded.
:limit INTEGER
-Example : `(=> alert number-of-processes (:limit 200))`
+Example : `(=> alert number-of-processes :limit 200)`
pid-running
@@ -154,7 +154,7 @@ Check if the PID number found in a .pid file is alive.
> Set the path of the pid file. If $USER doesn't have permission to open it, return "file not found".
:path "STRING"
-Example : `(=> alert pid-running (:path "/var/run/nginx.pid"))`
+Example : `(=> alert pid-running :path "/var/run/nginx.pid")`
disk-usage
@@ -167,7 +167,7 @@ Check if the disk-usage of a chosen partition does exceed a specific limit.
> Set the limit that will trigger an alert when exceeded.
:limit INTEGER
-Example : `(=> alert disk-usage (:path "/tmp" :limit 50))`
+Example : `(=> alert disk-usage :path "/tmp" :limit 50)`
file-exists
@@ -177,7 +177,7 @@ Check if a file exists.
> Set the path of the file to check.
:path "STRING"
-Example : `(=> alert file-exists (:path "/var/postgresql/standby"))`
+Example : `(=> alert file-exists :path "/var/postgresql/standby")`
file-updated
@@ -190,7 +190,7 @@ Check if a file exists and has been updated since a defined time.
> Set the limit in minutes since the last modification time before triggering an alert.
:limit INTEGER
-Example : `(=> alert file-updated (:path "/var/log/nginx/access.log" :limit 60))`
+Example : `(=> alert file-updated :path "/var/log/nginx/access.log" :limit 60)`
load-average-1
@@ -200,7 +200,7 @@ Check if the load average during the last minute exceeds a specific limit.
> Set the limit not to exceed.
:limit INTEGER
-Example : `(=> alert load-average-1 (:limit 2))`
+Example : `(=> alert load-average-1 :limit 2)`
load-average-5
@@ -210,7 +210,7 @@ Check if the load average during the last five minutes exceeds a specific limit.
> Set the limit not to exceed.
:limit INTEGER
-Example : `(=> alert load-average-5 (:limit 2))`
+Example : `(=> alert load-average-5 :limit 2)`
load-average-15
@@ -220,7 +220,7 @@ Check if the load average during the last fifteen minutes exceeds a specific lim
> Set the limit not to exceed.
:limit INTEGER
-Example : `(=> alert load-average-15 (:limit 2))`
+Example : `(=> alert load-average-15 :limit 2)`
ping
@@ -230,7 +230,7 @@ Check if a remote host answers the 2 ICMP ping.
> Set the host to ping. Return an error if ping command returns non-zero.
:host "STRING" (can be IP or hostname)
-Example : `(=> alert ping (:host "8.8.8.8"))`
+Example : `(=> alert ping :host "8.8.8.8")`
command
@@ -241,7 +241,7 @@ This may be the most useful probe because it let the user do any check needed.
> Command to execute, accept commands with pipes.
:command "STRING"
-Example : `(=> alert command (:command "tail -n 10 /var/log/messages | grep -v CRITICAL"))`
+Example : `(=> alert command :command "tail -n 10 /var/log/messages | grep -v CRITICAL")`
service
-------
@@ -250,7 +250,7 @@ Check if a service is started on the system.
> Set the name of the service to test
:name STRING
-Example : `(=> alert service (:name "mysql-server"))`
+Example : `(=> alert service :name "mysql-server")`
file-less-than
--------------
@@ -262,7 +262,7 @@ Check if a file has a size less than a specified limit.
> Set the limit in bytes before triggering an alert.
:limit INTEGER
-Example : `(=> alert file-less-than (:path "/var/log/nginx.log" :limit 60))`
+Example : `(=> alert file-less-than :path "/var/log/nginx.log" :limit 60)`
The configuration file
@@ -278,13 +278,13 @@ It's possible to write loops if you don't want to repeat code
(loop for host in '("bitreich.org" "dataswamp.org" "floodgap.com")
do
- (=> mail ping (:host host)))
+ (=> mail ping :host host))
or another example
(loop for service in '("smtpd" "nginx" "mysqld" "postgresql")
do
- (=> mail service (:name service)))
+ (=> mail service :name service))
and another example using rows from a file to check remote hosts
@@ -292,7 +292,7 @@ and another example using rows from a file to check remote hosts
(loop for line = (read-line stream nil)
while line
do
- (=> mail ping (:host line))))
+ (=> mail ping :host line)))
Conditional
@@ -310,9 +310,9 @@ router, probes requiring the router to work will trigger errors so we
should skip them.
(stop-if-error
- (=> mail ping (:host "192.168.1.1" :desc "My local router"))
- (=> mail ping (:host "89.89.89.89" :desc "My ISP DNS server"))
- (=> mail ping (:host "kernel.org" :desc "Remote website")))
+ (=> mail ping :host "192.168.1.1" :desc "My local router")
+ (=> mail ping :host "89.89.89.89" :desc "My ISP DNS server")
+ (=> mail ping :host "kernel.org" :desc "Remote website"))
Note : stop-if-error is an alias for the **and** function.
@@ -326,10 +326,10 @@ the detection and fixing it. You could want to receive a mail when
things need to be fixed on spare time, but mail another people if
things aren't fixed after some level.
-(escalation
- (=> mail-me disk-usage (:path "/" :limit 70))
- (=> sms-me disk-usage (:path "/" :limit 90))
- (=> buzzer disk-usage (:path "/" :limit 98)))
+ (escalation
+ (=> mail-me disk-usage :path "/" :limit 70)
+ (=> sms-me disk-usage :path "/" :limit 90)
+ (=> buzzer disk-usage :path "/" :limit 98))
In this example, we check the disk usage, I will get a mail through
"mail-me" alert if the disk usage go get more than 70%. Once it goes
DIR diff --git a/config.lisp.sample b/config.lisp.sample
@@ -3,17 +3,17 @@
(alert mail "echo -n 'Problem with %function% %date% %params%' | mail -s alarm mail@isp.net")
(alert sms "/home/user/sms.sh '%date% %function% %params% %hostname%")
(alert available-variables "REMINDER : %function% %params% %date% %hostname% %desc% %level% %os% %newline% %result%")
-(alert void "")
+(alert empty "")
;; this is a comment
; this is also a comment
-(=> mail disk-usage (:path "/" :limit 90))
+(=> mail disk-usage :path "/" :limit 90)
-(=> mail service (:name "dovecot"))
-(=> mail service (:name "httpd"))
-(=> sms service (:name "smtpd"))
-(=> mail number-of-processes (:limit 100))
+(=> mail service :name "dovecot")
+(=> mail service :name "httpd")
+(=> sms service :name "smtpd")
+(=> mail number-of-processes :limit 100)
-(=> mail ping (:host "bitreich.org" :desc "Ping Bitreich"))
-(=> mail ping (:host "openbsd.org" :desc "Ping OpenBSD.org"))
+(=> mail ping :host "bitreich.org" :desc "Ping Bitreich")
+(=> mail ping :host "openbsd.org" :desc "Ping OpenBSD.org")
DIR diff --git a/example.lisp b/example.lisp
@@ -1,58 +1,62 @@
(load "functions.lisp")
(alert dont-use-it "REMINDER %function% %params% %date% %hostname% %desc% %level% %os% %newline% _ %space% %result%")
-(alert void "")
+(alert empty "")
(alert mail "")
+(alert peroket "echo 'problem at %date% with %function% %params%'")
(alert sms "echo -n '%date% %function% CRITICAL on %hostname%' | curl http://somewebservice")
;(alert mail "echo -n '%date% %hostname% had problem on %function% %newline% %params% values %result% %newline%
; %desc%' | mail -s '[Error] %function% - %hostname%' foo@bar.com")
;; check if used percent :path partition is more than :limit
-(=> mail disk-usage (:path "/" :limit 90))
-(=> mail disk-usage (:path "/usr" :limit 85))
-(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure
+(=> peroket disk-usage :path "/" :limit 90)
+(=> peroket disk-usage :path "/usr" :limit 85)
+(=> peroket disk-usage :path "/tmp" :limit 0) ;; failure
;; check if :path file exists
-(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd"))
-(=> void file-exists (:path "/non-existant-file")) ;; failure file not found
+(=> mail file-exists :path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd")
+(=> empty file-exists :path "/non-existant-file") ;; failure file not found
;; check if :path file exists and has been updated since :limit minutes
-(=> void file-updated (:path "/var/log/messages" :limit 400))
-(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure
+(=> empty file-updated :path "/var/log/messages" :limit 400)
+(=> mail file-updated :path "/bsd.rd" :limit 1 :desc "OpenBSD kernel") ;; failure
;; check if :path pid file process is running
-(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid"))
-(=> mail pid-running (:path "/home/user/test.pid")) ;; failure
+(=> mail pid-running :path "/var/run/xdm.pid" :desc "XDM pid")
+(=> mail pid-running :path "/home/user/test.pid") ;; failure
;; check if number of processes on the system is more than :limit
-(=> mail number-of-processes (:limit 200))
-(=> mail number-of-processes (:limit 1)) ;; failure
+(=> mail number-of-processes :limit 200)
+(=> mail number-of-processes :limit 1) ;; failure
;; check if service is running
-(=> mail service (:name "httpd"))
-(=> mail service (:name "ospfd")) ;; failure : not started
-(=> mail service (:name "unknown")) ;; failure : not known
+(=> mail service :name "httpd")
+(=> mail service :name "ospfd") ;; failure : not started
+(=> mail service :name "unknown") ;; failure : not known
;; check if load average on (1/5/15) minutes is more than :limit
-(=> mail load-average-1 (:limit 4))
-;;(=> mail load-average-5 (:limit 2))
-;;(=> mail load-average-15 (:limit 1))
-(=> mail load-average-1 (:limit 0.2)) ;; should trigger error
+(=> mail load-average-1 :limit 4)
+;;(=> mail load-average-5 :limit 2)
+;;(=> mail load-average-15 :limit 1)
+(=> mail load-average-1 :limit 0.2) ;; should trigger error
;; check if :host host is reachable
-;;(=> mail ping (:host "8.8.8.8" :desc "Google DNS"))
-;;(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out
+;;(=> mail ping :host "8.8.8.8" :desc "Google DNS")
+;;(=> empty ping :host "127.40.30.21" :desc "Certainly not used address") ;; fail time out
+(loop for host in (list "8.8.8.8" "8.8.4.4" "127.0.0.1")
+ do
+ (=> empty ping :host host))
;; check if :command command return 0 (success) or something else (error)
-(=> void command (:command "echo hello")) ;; success
-(=> void command (:command "ls /non-existent-file")) ;; fail
+(=> empty command :command "echo hello") ;; success
+(=> empty command :command "ls /non-existent-file") ;; fail
;; check if web page :url answer under :limit
-(=> void command (:command "curl -m 10 http://google.fr/"))
+(=> empty command :command "curl -m 10 http://google.fr/")
;; check if the web page :url contains the text regex :pattern
-(=> void command (:command "curl http://google.fr/ | grep html"))
-(=> void command (:command "curl http://google.fr/ | grep hello")) ;; error
+(=> empty command :command "curl http://google.fr/ | grep html")
+(=> empty command :command "curl http://google.fr/ | grep hello") ;; error
(quit)
DIR diff --git a/functions.lisp b/functions.lisp
@@ -25,7 +25,9 @@
while pos)))
(defmacro create-probe(name &body code)
- `(progn (defun ,name(params) ,@code)))
+ `(progn
+ (defparameter ,name ',name)
+ (defun ,name(params) ,@code)))
(defun get-file-size(path)
(with-open-file (stream path)
@@ -38,7 +40,9 @@
(list nil (format nil "return code = ~a" code)))))
(defmacro alert(name string)
- `(progn (push (list ',name ,string)
+ `(progn
+ (defparameter ,name ',name)
+ (push (list ',name ,string)
*alerts*)))
(defun trigger-alert(level function params result)
@@ -67,17 +71,16 @@
`(progn
(or ,@body)))
-(defmacro =>(level fonction params)
- `(progn
- (format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params))
- (let ((result (funcall ',fonction ',params)))
- (if (not (listp result))
- (progn
- (format t " => ~asuccess~a~%" *green* *white*)
- t)
- (progn
- (format t " => ~aerror~a~%" *red* *white*)
- (uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t)
- nil)))))
+(defun =>(level fonction &rest params)
+ (format t "[~a~a ~20A~a] ~35A" *yellow* level fonction *white* (getf params :desc params))
+ (let ((result (funcall fonction params)))
+ (if (not (listp result))
+ (progn
+ (format t " => ~asuccess~a~%" *green* *white*)
+ t)
+ (progn
+ (format t " => ~aerror~a~%" *red* *white*)
+ (uiop:run-program (trigger-alert level fonction params (cadr result)) :output t)
+ nil))))
(load "probes.lisp")