better logging, added reconnect

BBQigniter · Mar 3, 2023 · 43b419d · 43b419d
1 parent 3d4c587
commit 43b419d
Show file tree

Hide file tree

Showing 9 changed files with 775 additions and 49 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -2,13 +2,14 @@ FROM python:3.10.5-slim-buster
 
 # install a few things we need for running this stuff or could be used for debugging
 RUN apt-get update && apt-get install -y vim procps curl
-RUN pip install kubernetes psutil --disable-pip-version-check --no-cache-dir
+RUN pip install kubernetes psutil coloredlogs --disable-pip-version-check --no-cache-dir
 
 RUN mkdir -p /opt/script/kube-vip-watcher/healthchecks
 ADD kube-vip-watcher.py /opt/script/kube-vip-watcher/
 RUN chmod +x /opt/script/kube-vip-watcher/kube-vip-watcher.py
 ADD healthchecks/* /opt/script/kube-vip-watcher/healthchecks/
 RUN chmod +x /opt/script/kube-vip-watcher/healthchecks/*.py
+ADD lib/* /opt/script/kube-vip-watcher/lib/
 
 # Add the user UID:1000, GID:1000, home at /app
 RUN groupadd -r kube-vip-watcher -g 1000 && \

diff --git a/README.md b/README.md
@@ -11,8 +11,9 @@ defined node and moves the VIP.
 This particularily can be useful for pods where the services need kube-vip's `externalTrafficPolicy: Local` option configured. And
 for better loadbalancing to pods (exposed with multiple VIPs - ergo RR-DNS) running on different nodes.
 
-> It can take up to about 20 seconds until traffic reaches a local-pod after failover. I assume this is in connection to default ARP-timeout
-settings.
+> `kube-vip` itself must be running with the flag `svc_election` set to `true` 
+
+Tested with kube-vip version up to v0.5.10 - so far I've seen no real problems.
 
 ## Workload examples
 
@@ -36,8 +37,99 @@ Create the partitions needed as configured in the corresponding yaml manifest-pa
 
 A simple echoserver example.
 
-# Known Issues/Todos
+# Known Issues
 
 * possibly a few test-cases are not covered
-* better logging needed (e.g. if service-account-token is incorrect/missing)
-* implement new service-account-token see https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
+* currently all logs are written to console, so if you send logs via syslog too and scrape logs of pods - logs might be duplicated
+
+# Libraries for Logging and Locking
+
+One of my first "modules", it works, but can certainly be done better.
+
+## Prerequisites
+
+### cplogging.py
+If you want to use it as standalone plugin, following Pyhton modules are needed:
+
+    pip install coloredlogs 
+
+### lockJob.py
+If you want to use it you need the cplogging.py. Besides this plugin uses also a
+few settings from `settings.py`
+
+## Examples
+Have a look at the `example.py` it's well documented
+
+## settings.py
+Used for defining default log-values. They can be overridden in your main-program
+See `example.py` for more info
+
+# Additional Notes
+
+## Filebeat autodiscover-example
+
+If you are using Filebeat for scraping the Kubernetes-pods, you might add something 
+like this to your autodiscover-section, if you set logging to be in JSON-format.
+
+```
+...
+autodiscover:
+  providers:
+  - type: kubernetes
+    node: ${NODE_NAME}
+    templates:
+      ...
+      # we scrape "ingress-nginx" logs and use the special available module
+      - condition:
+          equals:
+            kubernetes.labels.app_kubernetes_io/name: "ingress-nginx"
+        config:
+          - module: nginx
+            ingress_controller:
+              input:
+                type: container
+                stream: stdout
+                paths:
+                  - /var/log/containers/*${data.kubernetes.container.id}.log
+            error:
+              input:
+                type: container
+                stream: stderr
+                paths:
+                  - /var/log/containers/*${data.kubernetes.container.id}.lo
+      # this will make filebeat scrape the "kube-vip-watcher"-logs and decode the JSON-message
+      - condition:
+          equals:
+            kubernetes.container.name: "kube-vip-watcher"
+        config:
+          - type: container
+            paths:
+              - "/var/log/containers/*${data.kubernetes.container.id}.log"
+            processors:
+              - decode_json_fields:
+                  fields: ["message"]
+                  max_depth: 3
+                  target: ""
+                  overwrite_keys: true
+      # fallback "condition" - scrape everything else as normal container
+      - condition.and:
+          ...
+          # we explicitly set a rule for "ingress-nginx"
+          - not.equals:
+              kubernetes.labels.app_kubernetes_io/name: "ingress-nginx"
+          # we explicitly set a rule for kube-vip-watcher
+          - not.equals:
+              kubernetes.container.name: "kube-vip-watcher"
+        config:
+          - type: container
+            paths:
+              - "/var/log/containers/*${data.kubernetes.container.id}.log"
+...
+```
+
+# Changelog
+
+* v0.07 - initial release
+* v0.08 - 2023-02-20
+  - added better logging for easier debugging
+  - fixed restarting of pods via adding reconnect-handling inside the script
diff --git a/cp_logging_example.py b/cp_logging_example.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import time
+import socket
+import lib.settings
+from lib.cplogging import Cplogging
+from lib.lockJob import LockJob
+
+# OVERRIDE GLOBAL SETTINGS from lib/settings.py
+# log-level may be set to: "info" (default), "warning", "debug", "critical", "error"
+lib.settings.global_log_level = "debug"
+# where do you want a log-file - default "disabled"
+lib.settings.global_log_file_path = "/home/administrator/PycharmProjects/log_n_lock/logs/example.log"
+# if you override the global_filename you also have to override the log_format variables! else the override won't have any effect
+lib.settings.global_process_name = "example.py"
+# log_format for console, file and syslog
+#     - attributes: https://docs.python.org/2/library/logging.html#logrecord-attributes
+#     - hostname a special from coloredlogs python module
+# %(asctime)s is configured in cplogging.py - see lines with datefmt="%Y-%m-%dT%H:%M:%S.%F%z"
+lib.settings.global_log_format = "%(asctime)s " + socket.gethostname() + " " + lib.settings.global_process_name + \
+                                 "[%(process)d]: MODULE: %(name)s LEVEL: %(levelname)s MESSAGE: %(message)s"
+lib.settings.global_log_file_format = "%(asctime)s " + socket.gethostname() + " " + lib.settings.global_process_name + \
+                                      "[%(process)d]: MODULE: %(name)s LEVEL: %(levelname)s MESSAGE: %(message)s"
+lib.settings.global_log_server_format = "%(asctime)s " + socket.gethostname() + " " + lib.settings.global_process_name + \
+                                        "[%(process)d]: MODULE: %(name)s LEVEL: %(levelname)s MESSAGE: %(message)s"
+# enable or disable sending to syslog
+lib.settings.global_log_server_enable = True
+# where should the syslogs be sent - IP/FQDN and port (UDP)
+lib.settings.global_log_server = ("log-destination.example.com", 1514)
+
+
+def main():
+    # we create a separate logger for the main-program - this shadows the corresponding variable
+    # so it writes to the correct logging-handler - in this case we use "main" as logger_name
+    # else it would default to __main__
+    logger_name = "main"
+    # other examples how to create a log-handler
+    # logger = Cplogging("main", log_level=lib.settings.global_log_level, log_file_path=lib.settings.global_log_file_path)
+    # logger = Cplogging(logger_name, log_file_path="/home/administrator/PycharmProjects/tests/log_n_lock/logs/example.log")
+    # logger = Cplogging(logger_name, log_file_path="disabled")
+    logger = Cplogging(logger_name)
+    logger.info("info test")
+    logger.debug("debug test")
+    # this will explicitly use the "info" log-level
+    lock1 = LockJob(
+        process_name="example_part1",
+        section="part1",
+        log_level="info",
+        # log_file_path="/home/administrator/PycharmProjects/tests/log_n_lock/logs/lockJob.log"
+        # log_file_path="disabled"
+    )
+    lock1.create()
+
+    logger.info("will sleep - while sleeping you can 'watch -n1 'netstat -alp | grep example' on a console'")
+    time.sleep(5)
+    # we destroy the lock as we do not need it anymore. besides the corresponding logger will be removed too
+    lock1.destroy()
+    logger.info("sleeping finished")
+    # this will explicitly log the messages into a different log-file and set a higher log-level
+    lock2 = LockJob(
+        process_name="example_part2",
+        section="part2",
+        log_level="debug",
+        #log_file_path="/home/administrator/PycharmProjects/tests/log_n_lock/logs/lockJob.log"
+        log_file_path=lib.settings.global_log_file_path
+    )
+    # the lock will stay until the script is finished because later we do not destroy it as seen with lock1
+    lock2.create()
+    logger.info("will sleep again")
+    time.sleep(15)
+    logger.info("sleeping finished")
+# enddef
+
+
+if __name__ == '__main__':
+    logger_name = "real_main"
+    # this will explicitly disable logging to the globally defined log-file
+    logger = Cplogging(logger_name, log_file_path="disabled")
+    logger.debug("debug test starting main()")
+    main()
+    logger.debug("debug test finished main()")
+# endif