init - reed-alert - Lightweight agentless alerting system for server HTML git clone git://bitreich.org/reed-alert/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/reed-alert/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 9410e05e37aefd2e1880178e655a8bd64173c645 HTML Author: solene rapenne <solene@dataswamp.org> Date: Fri, 7 Oct 2016 12:25:49 +0200 init Diffstat: A example.lisp | 50 +++++++++++++++++++++++++++++++ A functions.lisp | 49 +++++++++++++++++++++++++++++++ A probes.lisp | 92 +++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 0 deletions(-) --- DIR diff --git a/example.lisp b/example.lisp @@ -0,0 +1,50 @@ +(defvar *alerts* + (list + '(dont-use-it ("REMINDER" function params date hostname description level os newline _ space result)) + '(void nil) + '(mail nil) + '(sms ("echo -n '" date _ function " CRITICAL " hostname "' | curl http://somewebservice")) + '(mail ("echo -n '" date _ hostname " had problem on " function newline params _ " values " result newline + description "' | mail -s '[Error] " function " - " hostname "' foo@bar.com")) + '(with-plus ("echo -n '" + date + _ + hostname + " had problem on " + function + newline + params + newline + + description + "' | mail -s '[Error] " + function + " - " + hostname + "' foo@bar.com")))) + +(load "functions.lisp") + +;; check if used percent :path partition is more than :limit +(=> mail disk-usage (:path "/" :limit 90)) +(=> mail disk-usage (:path "/usr" :limit 85)) +(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure + +;; check if :path file exists +(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd")) +(=> void file-exists (:path "/non-existant-file")) ;; failure file not found + +;; check if :path file exists and has been updated since :limit minutes +(=> void file-updated (:path "/var/log/messages" :limit 400)) +(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure + +;; check if :path pid file process is running +(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid")) +(=> mail pid-running (:path "/home/user/test.pid")) ;; failure + +;; check if number of processes on the system is more than :limit +(=> mail number-of-processes (:limit 200)) +(=> mail number-of-processes (:limit 1)) ;; failure + +;; check if load average on (1/5/15) minutes is more than :limit +(=> mail load-average-1 (:limit 4)) +(=> mail load-average-5 (:limit 2)) +(=> mail load-average-15 (:limit 1)) +(=> mail load-average-1 (:limit 0.2)) ;; should trigger error + +;; check if :host host is reachable +(=> mail ping (:host "8.8.8.8" :desc "Google DNS")) +(=> void ping (:host "2.3.4.256" :desc "Not valid ipv4 address")) ;; fail error +(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out + +;; check if :command command return 0 (success) or something else (error) +(=> void command (:command "echo hello")) ;; success +(=> void command (:command "ls /non-existent-file")) ;; fail + +(quit) DIR diff --git a/functions.lisp b/functions.lisp @@ -0,0 +1,49 @@ +(load "probes.lisp") + +(defun color(num1 num2) + (format nil "~a[~a;~am" #\Escape num1 num2)) + +(defparameter *red* (color 1 31)) +(defparameter *white* (color 0 70)) +(defparameter *green* (color 1 32)) +(defparameter *yellow* (color 0 33)) + +(defun trigger-alert(level function params result) + (format nil "~{~a~}" + (mapcar #'(lambda(x) + (if (symbolp x) + (case x + (+ "") + (result result) + (hostname (machine-instance)) + (date (multiple-value-bind + (second minute hour day month year) + (get-decoded-time) + (format nil "~a/~a/~a ~a:~a:~a" year month day hour minute second))) + (os (software-type)) + (function function) + (space " ") + (_ " ") + (params params) + (desc (getf params :desc "")) + (newline #\Newline) + (level level)) + x)) + (cadr (assoc level *alerts*))))) + +(defmacro stop-if-error(&body body) + `(progn + (and ,@body))) + +(defmacro =>(level fonction params) + `(progn + (format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params)) + (let ((result (funcall ',fonction ',params))) + (if (not (listp result)) + (progn + (format t " => ~asuccess~a~%" *green* *white*) + t) + (progn + (format t " => ~aerror~a~%" *red* *white*) + (uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t) + nil))))) DIR diff --git a/probes.lisp b/probes.lisp @@ -0,0 +1,92 @@ +(defmacro create-probe(name &body code) + `(progn (defun ,name(params) ,@code))) + +(defun command-return-code(command) + (let ((code (nth-value 2 (uiop:run-program command :ignore-error-status t)))) + (if (= 0 code) + t + (list nil (format nil "return code = ~a" code))))) + +(create-probe + file-exists + (let ((result (probe-file (getf params :path)))) + (if result + t + (list nil "file not found")))) + +(create-probe + file-updated + (if (probe-file (getf params :path)) + (with-open-file (file (getf params :path)) + (let* ((write-date (file-write-date file)) + (now (get-universal-time)) + (result (floor (- now write-date) 60))) + (if (> (getf params :limit) result) + t + (list nil result)))) + (list nil "file not found"))) + +(create-probe + pid-running + (if (probe-file (getf params :path)) + (let ((pid-number (with-open-file (stream (getf params :path)) (read-line stream)))) + (command-return-code (list "ps" "-p" pid-number))) + (list nil "file not found"))) + +(create-probe + disk-usage + (let* ((output (uiop:run-program (list "df" (getf params :path)) :output :lines)) (line (second output))) + (let ((percent-character-pos (position #\% line))) + (let ((used-disk + (parse-integer + (subseq line + (position #\Space line :end percent-character-pos :from-end t) + percent-character-pos)))) + (if (< used-disk (getf params :limit)) + t + (list nil "used-disk")))))) + +(defun system-load(time) + (read-from-string + (let ((command (concatenate 'string + "uptime | awk '{ print $" + (princ-to-string time) + " }'"))) + (uiop:run-program command :output :string)))) + +(create-probe + load-average-1 + (let ((load (system-load 10))) + (if (< load (getf params :limit)) + t + (list nil load)))) + +(create-probe + load-average-5 + (let ((load (system-load 11))) + (if (< load (getf params :limit)) + t + (list nil load)))) + +(create-probe + load-average-15 + (let ((load (system-load 12))) + (if (< load (getf params :limit)) + t + (list nil load)))) + +(create-probe + command + (command-return-code (getf params :command))) + +(create-probe + ping + (command-return-code (list "ping" "-c2" (getf params :host)))) + +(create-probe + number-of-processes + (let* ((output (uiop:run-program (list "ps" "aux") :output :lines)) + (result (length output))) + (if (> (getf params :limit) result) + t + (list nil result))))