feat: Implement Backend Manager for PLC S7-315 Streamer Watchdog Service
- Added backmanager.py to monitor backend health and restart if necessary. - Introduced backend_manager.status to track the state of the backend. - Updated system_state.json to reflect changes in active datasets. - Modified rotating_logger.py for improved log messages. - Enhanced main.spec to support separate executables for main application and backend manager. - Created backmanager.spec and backmanager_config.json for backend manager configuration. - Added build_all.bat for streamlined build process.
This commit is contained in:
parent
ee6918445e
commit
082f8b1790
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"timestamp": "2025-08-22T14:55:09.660532",
|
||||
"status": "healthy",
|
||||
"restart_count": 0,
|
||||
"last_restart": 0,
|
||||
"backend_pid": 33120,
|
||||
"manager_pid": 1488,
|
||||
"details": {}
|
||||
}
|
|
@ -0,0 +1,380 @@
|
|||
"""
|
||||
Backend Manager - PLC S7-315 Streamer Watchdog Service
|
||||
|
||||
This script monitors the backend health and automatically restarts it when needed.
|
||||
It runs as a separate process and ensures the backend is always available.
|
||||
|
||||
Key features:
|
||||
- Health monitoring every 30 seconds
|
||||
- Automatic restart of failed backends
|
||||
- Support for both development (main.py) and production (exe) environments
|
||||
- Robust process management and cleanup
|
||||
- Logging and status reporting
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import psutil
|
||||
import requests
|
||||
import subprocess
|
||||
import threading
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
||||
class BackendManager:
|
||||
"""Manages backend lifecycle and health monitoring"""
|
||||
|
||||
def __init__(self,
|
||||
check_interval: int = 30,
|
||||
health_timeout: float = 5.0,
|
||||
restart_delay: int = 10,
|
||||
max_restart_attempts: int = 3,
|
||||
restart_cooldown: int = 300):
|
||||
"""
|
||||
Initialize the backend manager
|
||||
|
||||
Args:
|
||||
check_interval: Health check interval in seconds (default: 30)
|
||||
health_timeout: HTTP request timeout in seconds (default: 5.0)
|
||||
restart_delay: Delay before restart attempt in seconds (default: 10)
|
||||
max_restart_attempts: Maximum consecutive restart attempts (default: 3)
|
||||
restart_cooldown: Cooldown period after max attempts in seconds (default: 300)
|
||||
"""
|
||||
self.check_interval = check_interval
|
||||
self.health_timeout = health_timeout
|
||||
self.restart_delay = restart_delay
|
||||
self.max_restart_attempts = max_restart_attempts
|
||||
self.restart_cooldown = restart_cooldown
|
||||
|
||||
# Configuration
|
||||
self.backend_port = 5050
|
||||
self.health_endpoint = "/api/health"
|
||||
self.base_url = f"http://localhost:{self.backend_port}"
|
||||
self.lock_file = "plc_streamer.lock"
|
||||
self.status_file = "backend_manager.status"
|
||||
|
||||
# State tracking
|
||||
self.restart_count = 0
|
||||
self.last_restart_time = 0
|
||||
self.backend_process = None
|
||||
self.running = True
|
||||
|
||||
# Setup logging
|
||||
self.setup_logging()
|
||||
|
||||
# Detect environment
|
||||
self.is_packaged = getattr(sys, 'frozen', False)
|
||||
|
||||
self.log(f"[MAIN] Backend Manager initialized")
|
||||
self.log(f"[CONFIG] Check interval: {check_interval}s")
|
||||
self.log(f"[CONFIG] Environment: {'Packaged' if self.is_packaged else 'Development'}")
|
||||
self.log(f"[CONFIG] Process separation: Independent cmd windows")
|
||||
|
||||
def setup_logging(self):
|
||||
"""Setup logging configuration"""
|
||||
log_format = '%(asctime)s [%(levelname)s] %(message)s'
|
||||
|
||||
# Configure file handler with UTF-8 encoding
|
||||
file_handler = logging.FileHandler('backend_manager.log', encoding='utf-8')
|
||||
file_handler.setFormatter(logging.Formatter(log_format))
|
||||
|
||||
# Configure console handler with UTF-8 encoding
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setFormatter(logging.Formatter(log_format))
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format=log_format,
|
||||
handlers=[file_handler, console_handler]
|
||||
)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def log(self, message: str, level: str = "INFO"):
|
||||
"""Log message with appropriate level"""
|
||||
if level == "ERROR":
|
||||
self.logger.error(message)
|
||||
elif level == "WARN":
|
||||
self.logger.warning(message)
|
||||
else:
|
||||
self.logger.info(message)
|
||||
|
||||
def get_backend_command(self) -> list:
|
||||
"""Get the appropriate backend command for current environment (legacy - kept for compatibility)"""
|
||||
if self.is_packaged:
|
||||
# In packaged environment, look for the exe
|
||||
exe_path = os.path.join(os.path.dirname(sys.executable), "S7_Streamer_Logger.exe")
|
||||
if os.path.exists(exe_path):
|
||||
return [exe_path]
|
||||
else:
|
||||
# Fallback to exe in current directory
|
||||
exe_path = "S7_Streamer_Logger.exe"
|
||||
return [exe_path]
|
||||
else:
|
||||
# In development environment, use conda environment
|
||||
# Try to detect if we're in snap7v12 environment
|
||||
conda_env_python = r"C:\Users\migue\miniconda3\envs\snap7v12\python.exe"
|
||||
if os.path.exists(conda_env_python):
|
||||
main_script = os.path.join(os.path.dirname(__file__), "main.py")
|
||||
return [conda_env_python, main_script]
|
||||
else:
|
||||
# Fallback to current python
|
||||
python_exe = sys.executable
|
||||
main_script = os.path.join(os.path.dirname(__file__), "main.py")
|
||||
return [python_exe, main_script]
|
||||
|
||||
def is_backend_alive(self) -> bool:
|
||||
"""Check if backend is responding to health checks"""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.base_url}{self.health_endpoint}",
|
||||
timeout=self.health_timeout
|
||||
)
|
||||
return 200 <= response.status_code < 300
|
||||
except (requests.RequestException, requests.ConnectionError,
|
||||
requests.Timeout, requests.ConnectTimeout):
|
||||
return False
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Unexpected error during health check: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
def get_backend_pid(self) -> Optional[int]:
|
||||
"""Get backend PID from lock file"""
|
||||
try:
|
||||
if os.path.exists(self.lock_file):
|
||||
with open(self.lock_file, 'r') as f:
|
||||
return int(f.read().strip())
|
||||
except (ValueError, FileNotFoundError, IOError):
|
||||
pass
|
||||
return None
|
||||
|
||||
def is_backend_process_running(self, pid: int) -> bool:
|
||||
"""Check if backend process is actually running"""
|
||||
try:
|
||||
if not psutil.pid_exists(pid):
|
||||
return False
|
||||
|
||||
proc = psutil.Process(pid)
|
||||
cmdline = " ".join(proc.cmdline()).lower()
|
||||
|
||||
# Check for backend signatures
|
||||
signatures = ["main.py", "s7_streamer_logger", "plc_streamer"]
|
||||
return any(sig in cmdline for sig in signatures)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
return False
|
||||
|
||||
def cleanup_zombie_process(self, pid: int) -> bool:
|
||||
"""Terminate zombie backend process"""
|
||||
try:
|
||||
if not psutil.pid_exists(pid):
|
||||
return True
|
||||
|
||||
proc = psutil.Process(pid)
|
||||
self.log(f"[STOP] Terminating zombie process {pid} ({proc.name()})")
|
||||
|
||||
# Try graceful termination
|
||||
proc.terminate()
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
self.log(f"[OK] Process {pid} terminated gracefully")
|
||||
return True
|
||||
except psutil.TimeoutExpired:
|
||||
# Force kill
|
||||
self.log(f"[FORCE] Force killing process {pid}")
|
||||
proc.kill()
|
||||
proc.wait(timeout=5)
|
||||
self.log(f"[KILL] Process {pid} force killed")
|
||||
return True
|
||||
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
return True
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Error terminating process {pid}: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
def cleanup_lock_file(self):
|
||||
"""Remove stale lock file"""
|
||||
try:
|
||||
if os.path.exists(self.lock_file):
|
||||
os.remove(self.lock_file)
|
||||
self.log(f"[OK] Removed lock file: {self.lock_file}")
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Error removing lock file: {e}", "ERROR")
|
||||
|
||||
def get_cmd_command(self) -> str:
|
||||
"""Get Windows cmd command to launch backend in separate console window"""
|
||||
if self.is_packaged:
|
||||
# In packaged environment, launch exe in new cmd window
|
||||
exe_path = os.path.join(os.path.dirname(sys.executable), "S7_Streamer_Logger.exe")
|
||||
if os.path.exists(exe_path):
|
||||
return f'start "S7_Streamer_Logger" "{exe_path}"'
|
||||
else:
|
||||
# Fallback to exe in current directory
|
||||
return 'start "S7_Streamer_Logger" "S7_Streamer_Logger.exe"'
|
||||
else:
|
||||
# In development environment, launch python script in new cmd window
|
||||
conda_env_python = r"C:\Users\migue\miniconda3\envs\snap7v12\python.exe"
|
||||
if os.path.exists(conda_env_python):
|
||||
main_script = os.path.join(os.path.dirname(__file__), "main.py")
|
||||
return f'start "PLC_Backend" "{conda_env_python}" "{main_script}"'
|
||||
else:
|
||||
# Fallback to current python
|
||||
python_exe = sys.executable
|
||||
main_script = os.path.join(os.path.dirname(__file__), "main.py")
|
||||
return f'start "PLC_Backend" "{python_exe}" "{main_script}"'
|
||||
|
||||
def start_backend(self) -> bool:
|
||||
"""Start the backend process in a separate Windows cmd console"""
|
||||
try:
|
||||
cmd_command = self.get_cmd_command()
|
||||
self.log(f"[START] Starting backend in separate cmd window: {cmd_command}")
|
||||
|
||||
# Launch backend in completely separate cmd window using shell command
|
||||
self.backend_process = subprocess.Popen(
|
||||
cmd_command,
|
||||
cwd=os.path.dirname(__file__) if not self.is_packaged else None,
|
||||
shell=True # Use shell to properly handle the start command
|
||||
)
|
||||
|
||||
self.log(f"[START] Backend launch command executed with PID: {self.backend_process.pid}")
|
||||
|
||||
# Wait a moment for the actual backend to start in its new window
|
||||
self.log(f"[WAIT] Waiting 10 seconds for backend to initialize in separate window...")
|
||||
time.sleep(10)
|
||||
|
||||
# The subprocess.Popen PID is just the cmd launcher, not the actual backend
|
||||
# We'll verify health via HTTP instead of process tracking
|
||||
self.log(f"[OK] Backend launch completed, will verify via health check")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Error starting backend: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
def handle_backend_failure(self) -> bool:
|
||||
"""Handle backend failure and attempt restart"""
|
||||
current_time = time.time()
|
||||
|
||||
# Check if we're in cooldown period
|
||||
if (current_time - self.last_restart_time) < self.restart_cooldown:
|
||||
time_left = self.restart_cooldown - (current_time - self.last_restart_time)
|
||||
self.log(f"[WAIT] In cooldown period, {int(time_left)}s remaining")
|
||||
return False
|
||||
|
||||
# Check restart attempt limit
|
||||
if self.restart_count >= self.max_restart_attempts:
|
||||
self.log(f"[FAIL] Maximum restart attempts ({self.max_restart_attempts}) reached")
|
||||
self.restart_count = 0
|
||||
self.last_restart_time = current_time
|
||||
return False
|
||||
|
||||
# Cleanup existing processes
|
||||
backend_pid = self.get_backend_pid()
|
||||
if backend_pid and self.is_backend_process_running(backend_pid):
|
||||
self.log(f"[STOP] Cleaning up zombie backend process: {backend_pid}")
|
||||
self.cleanup_zombie_process(backend_pid)
|
||||
|
||||
self.cleanup_lock_file()
|
||||
|
||||
# Wait before restart
|
||||
self.log(f"[WAIT] Waiting {self.restart_delay}s before restart attempt {self.restart_count + 1}")
|
||||
time.sleep(self.restart_delay)
|
||||
|
||||
# Attempt restart
|
||||
self.restart_count += 1
|
||||
if self.start_backend():
|
||||
self.log(f"[OK] Backend restarted successfully (attempt {self.restart_count})")
|
||||
self.restart_count = 0 # Reset counter on success
|
||||
return True
|
||||
else:
|
||||
self.log(f"[FAIL] Backend restart failed (attempt {self.restart_count})", "ERROR")
|
||||
return False
|
||||
|
||||
def update_status(self, status: str, details: Dict[str, Any] = None):
|
||||
"""Update status file with current state"""
|
||||
try:
|
||||
status_data = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"status": status,
|
||||
"restart_count": self.restart_count,
|
||||
"last_restart": self.last_restart_time,
|
||||
"backend_pid": self.get_backend_pid(),
|
||||
"manager_pid": os.getpid(),
|
||||
"details": details or {}
|
||||
}
|
||||
|
||||
with open(self.status_file, 'w') as f:
|
||||
json.dump(status_data, f, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Error updating status file: {e}", "ERROR")
|
||||
|
||||
def run(self):
|
||||
"""Main monitoring loop"""
|
||||
self.log(f"[START] Backend Manager started (PID: {os.getpid()})")
|
||||
self.update_status("starting")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
# Check backend health
|
||||
if self.is_backend_alive():
|
||||
self.log(f"[OK] Backend is healthy")
|
||||
self.update_status("healthy")
|
||||
self.restart_count = 0 # Reset restart counter on successful health check
|
||||
else:
|
||||
self.log(f"[WARN] Backend health check failed", "WARN")
|
||||
self.update_status("unhealthy")
|
||||
|
||||
# Attempt to handle the failure
|
||||
if self.handle_backend_failure():
|
||||
self.update_status("restarted")
|
||||
else:
|
||||
self.update_status("failed")
|
||||
|
||||
# Wait for next check
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self.log(f"[SHUTDOWN] Received interrupt signal")
|
||||
self.running = False
|
||||
break
|
||||
except Exception as e:
|
||||
self.log(f"[ERROR] Unexpected error in main loop: {e}", "ERROR")
|
||||
self.update_status("error", {"error": str(e)})
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
"""Cleanup and shutdown"""
|
||||
self.log(f"[SHUTDOWN] Backend Manager shutting down")
|
||||
self.update_status("shutting_down")
|
||||
|
||||
# Don't terminate any backend processes - they run independently in their own cmd windows
|
||||
# The manager only monitors health, doesn't control the backend lifecycle directly
|
||||
self.log(f"[OK] Backend Manager stopped - backend continues running independently")
|
||||
self.update_status("stopped")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
print("Backend Manager - PLC S7-315 Streamer Watchdog")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
manager = BackendManager()
|
||||
manager.run()
|
||||
except KeyboardInterrupt:
|
||||
print("\n[SHUTDOWN] Backend Manager interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Critical error: {e}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
|
@ -76,9 +76,9 @@ class RotatingFileHandler(logging.Handler):
|
|||
oldest_file = log_files.pop(0)
|
||||
try:
|
||||
os.remove(oldest_file)
|
||||
print(f"🗑️ Removed old log file: {os.path.basename(oldest_file)}")
|
||||
print(f"[CLEANUP] Removed old log file: {os.path.basename(oldest_file)}")
|
||||
except OSError as e:
|
||||
print(f"⚠️ Could not remove {oldest_file}: {e}")
|
||||
print(f"[WARNING] Could not remove {oldest_file}: {e}")
|
||||
|
||||
def emit(self, record):
|
||||
"""Emitir un registro de log"""
|
||||
|
|
88
main.spec
88
main.spec
|
@ -4,7 +4,8 @@ import sys
|
|||
|
||||
block_cipher = None
|
||||
|
||||
a = Analysis(
|
||||
# Analysis for main application (backend)
|
||||
a_main = Analysis(
|
||||
['main.py'],
|
||||
pathex=[],
|
||||
binaries=[
|
||||
|
@ -80,6 +81,7 @@ a = Analysis(
|
|||
'utils.json_manager',
|
||||
'utils.symbol_loader',
|
||||
'utils.symbol_processor',
|
||||
'utils.instance_manager',
|
||||
],
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
|
@ -102,11 +104,57 @@ a = Analysis(
|
|||
noarchive=False,
|
||||
)
|
||||
|
||||
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||
# Analysis for backend manager (watchdog)
|
||||
a_manager = Analysis(
|
||||
['backmanager.py'],
|
||||
pathex=[],
|
||||
binaries=[],
|
||||
datas=[
|
||||
# Include utils for instance management
|
||||
('utils', 'utils'),
|
||||
],
|
||||
hiddenimports=[
|
||||
# System and monitoring dependencies
|
||||
'psutil',
|
||||
'psutil._pswindows',
|
||||
'psutil._psutil_windows',
|
||||
'requests',
|
||||
'subprocess',
|
||||
'logging',
|
||||
'json',
|
||||
|
||||
# Utils modules needed by manager
|
||||
'utils.instance_manager',
|
||||
],
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
runtime_hooks=[],
|
||||
excludes=[
|
||||
# Exclude heavy packages not needed by manager
|
||||
'matplotlib',
|
||||
'scipy',
|
||||
'IPython',
|
||||
'notebook',
|
||||
'jupyter',
|
||||
'flask',
|
||||
'snap7',
|
||||
'pandas',
|
||||
'numpy',
|
||||
],
|
||||
win_no_prefer_redirects=False,
|
||||
win_private_assemblies=False,
|
||||
cipher=block_cipher,
|
||||
noarchive=False,
|
||||
)
|
||||
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
# Build PYZ files
|
||||
pyz_main = PYZ(a_main.pure, a_main.zipped_data, cipher=block_cipher)
|
||||
pyz_manager = PYZ(a_manager.pure, a_manager.zipped_data, cipher=block_cipher)
|
||||
|
||||
# Build main backend executable
|
||||
exe_main = EXE(
|
||||
pyz_main,
|
||||
a_main.scripts,
|
||||
[],
|
||||
exclude_binaries=True,
|
||||
name='S7_Streamer_Logger',
|
||||
|
@ -121,7 +169,35 @@ exe = EXE(
|
|||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
)
|
||||
coll = COLLECT(exe, a.binaries, a.zipfiles, a.datas, strip=False, upx=True, upx_exclude=[], name='main')
|
||||
|
||||
# Build backend manager executable
|
||||
exe_manager = EXE(
|
||||
pyz_manager,
|
||||
a_manager.scripts,
|
||||
[],
|
||||
exclude_binaries=True,
|
||||
name='Backend_Manager',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
console=True,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch=None,
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
)
|
||||
|
||||
# Collect all files together
|
||||
coll = COLLECT(
|
||||
exe_main, a_main.binaries, a_main.zipfiles, a_main.datas,
|
||||
exe_manager, a_manager.binaries, a_manager.zipfiles, a_manager.datas,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
name='main'
|
||||
)
|
||||
|
||||
# Post-build: Copy config directory to the same level as the executable
|
||||
import shutil
|
||||
|
|
|
@ -4,11 +4,10 @@
|
|||
"should_stream": false,
|
||||
"active_datasets": [
|
||||
"DAR",
|
||||
"Test",
|
||||
"Fast"
|
||||
"Fast",
|
||||
"Test"
|
||||
]
|
||||
},
|
||||
"auto_recovery_enabled": true,
|
||||
"last_update": "2025-08-22T14:03:25.041057",
|
||||
"plotjuggler_path": "C:\\Program Files\\PlotJuggler\\plotjuggler.exe"
|
||||
"last_update": "2025-08-22T14:54:15.476402"
|
||||
}
|
Loading…
Reference in New Issue