From e517f40a5d51cc26095a3f9189cd254ada85fe15 Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 15 Aug 2025 00:28:21 +0200 Subject: [PATCH] feat: Enhance application event logging, improve CSV handling, and add CSV validator utility --- application_events.json | 206 +++++++++++++++- config/data/plot_definitions.json | 2 +- frontend/src/components/ChartjsPlot.jsx | 67 +++++- main.py | 112 +++++++-- system_state.json | 8 +- utils/csv_validator.py | 298 ++++++++++++++++++++++++ 6 files changed, 661 insertions(+), 32 deletions(-) create mode 100644 utils/csv_validator.py diff --git a/application_events.json b/application_events.json index b8703b0..ad23ecb 100644 --- a/application_events.json +++ b/application_events.json @@ -3726,8 +3726,210 @@ "trigger_variable": null, "auto_started": true } + }, + { + "timestamp": "2025-08-15T00:22:03.893270", + "level": "info", + "event_type": "application_started", + "message": "Application initialization completed successfully", + "details": {} + }, + { + "timestamp": "2025-08-15T00:22:03.973434", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: DAR", + "details": { + "dataset_id": "DAR", + "variables_count": 2, + "streaming_count": 2, + "prefix": "gateway_phoenix" + } + }, + { + "timestamp": "2025-08-15T00:22:03.978435", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: Fast", + "details": { + "dataset_id": "Fast", + "variables_count": 2, + "streaming_count": 1, + "prefix": "fast" + } + }, + { + "timestamp": "2025-08-15T00:22:03.984513", + "level": "info", + "event_type": "csv_recording_started", + "message": "CSV recording started: 2 datasets activated", + "details": { + "activated_datasets": 2, + "total_datasets": 3 + } + }, + { + "timestamp": "2025-08-15T00:22:03.990721", + "level": "info", + "event_type": "udp_streaming_started", + "message": "UDP streaming to PlotJuggler started", + "details": { + "udp_host": "127.0.0.1", + "udp_port": 9870, + "datasets_available": 3 + } + }, + { + "timestamp": "2025-08-15T00:22:53.358065", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 20, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:23:20.844836", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 20, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:23:59.638322", + "level": "info", + "event_type": "udp_streaming_stopped", + "message": "UDP streaming to PlotJuggler stopped (CSV recording continues)", + "details": {} + }, + { + "timestamp": "2025-08-15T00:24:35.808418", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 20, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:26:15.271771", + "level": "info", + "event_type": "application_started", + "message": "Application initialization completed successfully", + "details": {} + }, + { + "timestamp": "2025-08-15T00:26:15.365923", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: DAR", + "details": { + "dataset_id": "DAR", + "variables_count": 2, + "streaming_count": 2, + "prefix": "gateway_phoenix" + } + }, + { + "timestamp": "2025-08-15T00:26:15.372925", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: Fast", + "details": { + "dataset_id": "Fast", + "variables_count": 2, + "streaming_count": 1, + "prefix": "fast" + } + }, + { + "timestamp": "2025-08-15T00:26:15.377433", + "level": "info", + "event_type": "csv_recording_started", + "message": "CSV recording started: 2 datasets activated", + "details": { + "activated_datasets": 2, + "total_datasets": 3 + } + }, + { + "timestamp": "2025-08-15T00:26:31.108086", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 20, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:27:24.396654", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 20, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:27:40.211134", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } } ], - "last_updated": "2025-08-15T00:17:19.296417", - "total_entries": 344 + "last_updated": "2025-08-15T00:27:40.211134", + "total_entries": 360 } \ No newline at end of file diff --git a/config/data/plot_definitions.json b/config/data/plot_definitions.json index b3bca0c..65b829e 100644 --- a/config/data/plot_definitions.json +++ b/config/data/plot_definitions.json @@ -7,7 +7,7 @@ "point_hover_radius": 4, "point_radius": 0, "stepped": false, - "time_window": 20, + "time_window": 500, "trigger_enabled": false, "trigger_on_true": true, "trigger_variable": null, diff --git a/frontend/src/components/ChartjsPlot.jsx b/frontend/src/components/ChartjsPlot.jsx index 50b7544..390c943 100644 --- a/frontend/src/components/ChartjsPlot.jsx +++ b/frontend/src/components/ChartjsPlot.jsx @@ -146,6 +146,7 @@ const ChartjsPlot = ({ session, height = '400px' }) => { const loadHistoricalData = useCallback(async (variables, timeWindow) => { try { console.log(`πŸ“Š Loading historical data for ${variables.length} variables (${timeWindow}s window)...`); + setIsLoadingHistorical(true); const response = await fetch('/api/plots/historical', { method: 'POST', @@ -160,15 +161,41 @@ const ChartjsPlot = ({ session, height = '400px' }) => { }); if (!response.ok) { - throw new Error(`HTTP ${response.status}`); + const errorData = await response.json(); + throw new Error(`HTTP ${response.status}: ${errorData.error || 'Unknown error'}`); } const data = await response.json(); - console.log(`πŸ“Š Historical data response:`, data); + console.log(`πŸ“Š Historical data loaded:`, { + totalPoints: data.total_points || 0, + variablesFound: data.variables_found || [], + variablesRequested: variables, + timeRange: data.time_range, + dataPreview: data.data?.slice(0, 3) // Show first 3 points + }); + + // Show summary in console for debugging + if (data.data && data.data.length > 0) { + const variableStats = {}; + data.data.forEach(point => { + if (!variableStats[point.variable]) { + variableStats[point.variable] = { count: 0, min: point.value, max: point.value }; + } + variableStats[point.variable].count++; + variableStats[point.variable].min = Math.min(variableStats[point.variable].min, point.value); + variableStats[point.variable].max = Math.max(variableStats[point.variable].max, point.value); + }); + console.log(`πŸ“Š Variable statistics:`, variableStats); + } + return data.data || []; } catch (error) { console.warn('⚠️ Failed to load historical data:', error); + // Set error state to show user + setError(`Historical data loading failed: ${error.message}`); return []; + } finally { + setIsLoadingHistorical(false); } }, []); @@ -1276,6 +1303,42 @@ const ChartjsPlot = ({ session, height = '400px' }) => { borderColor="gray.200" position="relative" > + {/* Historical data loading indicator */} + {isLoadingHistorical && ( + + πŸ“Š Loading historical data... + + )} + + {/* Data points counter */} + {dataPointsCount > 0 && ( + + πŸ“ˆ {dataPointsCount} points + + )} + 0] print(f"πŸ” DEBUG: Headers in {csv_file}: {headers}") # Check if any of our variables are in this file @@ -1943,22 +1958,69 @@ def get_historical_data(): ) continue - # Read the CSV file + # Read the CSV file with proper encoding and error handling print(f"πŸ” DEBUG: Reading CSV file with pandas...") - df = pd.read_csv(csv_file) - print(f"πŸ” DEBUG: CSV loaded, shape: {df.shape}") + df = None + for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']: + try: + df = pd.read_csv(csv_file, encoding=encoding, on_bad_lines='skip') + print(f"πŸ” DEBUG: CSV loaded with {encoding}, shape: {df.shape}") + break + except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError): + continue + except Exception as e: + print(f"πŸ” DEBUG: Error reading with {encoding}: {e}") + continue + + if df is None: + print(f"Warning: Could not read CSV file {csv_file} with any encoding") + continue + + # Clean column names from BOM and unicode artifacts + df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns] - if "timestamp" not in df.columns: - print(f"πŸ” DEBUG: No timestamp column in {csv_file}, skipping") + # Convert timestamp to datetime with flexible parsing + print(f"πŸ” DEBUG: Converting timestamps...") + timestamp_col = None + for col in df.columns: + if 'timestamp' in col.lower(): + timestamp_col = col + break + + if timestamp_col is None: + print(f"πŸ” DEBUG: No timestamp column found in {csv_file}, skipping") + continue + + try: + # Try multiple timestamp formats + df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') + # Remove rows with invalid timestamps + df = df.dropna(subset=[timestamp_col]) + + if df.empty: + print(f"πŸ” DEBUG: No valid timestamps in {csv_file}, skipping") + continue + + # Normalize column name to 'timestamp' + if timestamp_col != 'timestamp': + df = df.rename(columns={timestamp_col: 'timestamp'}) + + print( + f"πŸ” DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}" + ) + print(f"πŸ” DEBUG: Filter range: {start_time} to {end_time}") + except Exception as e: + print(f"πŸ” DEBUG: Timestamp conversion failed for {csv_file}: {e}") continue - # Convert timestamp to datetime - print(f"πŸ” DEBUG: Converting timestamps...") - df["timestamp"] = pd.to_datetime(df["timestamp"]) - print( - f"πŸ” DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}" - ) - print(f"πŸ” DEBUG: Filter range: {start_time} to {end_time}") + # Recalculate matching variables after column cleaning + clean_headers = list(df.columns) + matching_vars = [var for var in variables if var in clean_headers] + print(f"πŸ” DEBUG: Matching variables after cleaning: {matching_vars}") + + if not matching_vars: + print(f"πŸ” DEBUG: No matching variables after cleaning in {csv_file}, skipping") + continue # Filter by time range mask = (df["timestamp"] >= start_time) & ( @@ -1976,24 +2038,27 @@ def get_historical_data(): for _, row in filtered_df.iterrows(): timestamp = row["timestamp"] for var in matching_vars: - if var in row: + if var in row and pd.notna(row[var]): try: # Convert value to appropriate type value = row[var] - if pd.isna(value): - continue # Handle boolean values if isinstance(value, str): - if value.lower() == "true": + value_lower = value.lower().strip() + if value_lower == "true": value = True - elif value.lower() == "false": + elif value_lower == "false": value = False else: try: value = float(value) except ValueError: continue + elif isinstance(value, (int, float)): + value = float(value) + else: + continue historical_data.append( { @@ -2004,6 +2069,7 @@ def get_historical_data(): ) except Exception as e: # Skip invalid values + print(f"πŸ” DEBUG: Skipping invalid value for {var}: {e}") continue except Exception as e: diff --git a/system_state.json b/system_state.json index 4eff48f..b36a226 100644 --- a/system_state.json +++ b/system_state.json @@ -1,13 +1,13 @@ { "last_state": { "should_connect": true, - "should_stream": true, + "should_stream": false, "active_datasets": [ - "DAR", "Test", - "Fast" + "Fast", + "DAR" ] }, "auto_recovery_enabled": true, - "last_update": "2025-08-15T00:17:13.675666" + "last_update": "2025-08-15T00:26:15.383017" } \ No newline at end of file diff --git a/utils/csv_validator.py b/utils/csv_validator.py new file mode 100644 index 0000000..f0175a0 --- /dev/null +++ b/utils/csv_validator.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +""" +CSV Validator and Cleaner Utility + +This utility helps diagnose and fix common CSV issues: +1. Encoding problems (BOM, UTF-16, etc.) +2. Inconsistent number of fields +3. Malformed timestamps +4. Invalid data types + +Usage: + python utils/csv_validator.py --scan records/ + python utils/csv_validator.py --fix records/15-08-2025/problematic_file.csv + python utils/csv_validator.py --validate records/15-08-2025/ +""" + +import os +import sys +import argparse +import pandas as pd +import glob +from pathlib import Path +from datetime import datetime + + +class CSVValidator: + """Validates and cleans CSV files for the PLC streaming system""" + + def __init__(self): + self.encodings_to_try = ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1', 'cp1252'] + self.issues_found = [] + self.fixed_files = [] + + def detect_encoding(self, file_path): + """Detect the encoding of a CSV file""" + for encoding in self.encodings_to_try: + try: + with open(file_path, 'r', encoding=encoding) as f: + f.read(1024) # Read first 1KB + return encoding + except (UnicodeDecodeError, UnicodeError): + continue + return None + + def read_csv_headers(self, file_path): + """Read CSV headers with encoding detection""" + encoding = self.detect_encoding(file_path) + if not encoding: + return None, None, "Could not detect encoding" + + try: + with open(file_path, 'r', encoding=encoding) as f: + header_line = f.readline().strip() + if not header_line: + return None, encoding, "Empty header line" + + # Clean header from BOM and unicode artifacts + header_line = header_line.replace('\ufeff', '').replace('\x00', '') + headers = [h.strip().replace('\x00', '') for h in header_line.split(',')] + headers = [h for h in headers if h and len(h.strip()) > 0] + + return headers, encoding, None + except Exception as e: + return None, encoding, str(e) + + def validate_csv_structure(self, file_path): + """Validate CSV structure and detect issues""" + issues = [] + + # Check headers + headers, encoding, header_error = self.read_csv_headers(file_path) + if header_error: + issues.append({ + 'type': 'header_error', + 'message': header_error, + 'file': file_path + }) + return issues + + if not headers: + issues.append({ + 'type': 'no_headers', + 'message': 'No valid headers found', + 'file': file_path + }) + return issues + + # Check for encoding issues in headers + if any('\x00' in h or 'ΓΏΓΎ' in h or '' in h for h in headers): + issues.append({ + 'type': 'encoding_artifacts', + 'message': f'Headers contain encoding artifacts: {headers}', + 'file': file_path, + 'encoding': encoding + }) + + # Try to read full CSV with pandas + try: + df = None + for enc in self.encodings_to_try: + try: + df = pd.read_csv(file_path, encoding=enc, on_bad_lines='skip') + break + except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError): + continue + + if df is None: + issues.append({ + 'type': 'read_error', + 'message': 'Could not read CSV with any encoding', + 'file': file_path + }) + return issues + + # Check for inconsistent columns + expected_cols = len(headers) + if len(df.columns) != expected_cols: + issues.append({ + 'type': 'column_mismatch', + 'message': f'Expected {expected_cols} columns, found {len(df.columns)}', + 'file': file_path, + 'expected_headers': headers, + 'actual_headers': list(df.columns) + }) + + # Check for timestamp column + timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()] + if not timestamp_cols: + issues.append({ + 'type': 'no_timestamp', + 'message': 'No timestamp column found', + 'file': file_path, + 'columns': list(df.columns) + }) + else: + # Validate timestamp format + timestamp_col = timestamp_cols[0] + try: + df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') + invalid_timestamps = df[timestamp_col].isna().sum() + if invalid_timestamps > 0: + issues.append({ + 'type': 'invalid_timestamps', + 'message': f'{invalid_timestamps} invalid timestamps found', + 'file': file_path, + 'timestamp_column': timestamp_col + }) + except Exception as e: + issues.append({ + 'type': 'timestamp_parse_error', + 'message': f'Cannot parse timestamps: {str(e)}', + 'file': file_path, + 'timestamp_column': timestamp_col + }) + + except Exception as e: + issues.append({ + 'type': 'general_error', + 'message': f'Error reading CSV: {str(e)}', + 'file': file_path + }) + + return issues + + def scan_directory(self, directory_path): + """Scan directory for CSV issues""" + print(f"πŸ” Scanning directory: {directory_path}") + + csv_files = glob.glob(os.path.join(directory_path, "**/*.csv"), recursive=True) + total_files = len(csv_files) + + print(f"πŸ“ Found {total_files} CSV files") + + all_issues = [] + for i, csv_file in enumerate(csv_files, 1): + print(f"πŸ“„ Checking {i}/{total_files}: {os.path.basename(csv_file)}") + + issues = self.validate_csv_structure(csv_file) + if issues: + all_issues.extend(issues) + print(f" ⚠️ {len(issues)} issues found") + else: + print(f" βœ… OK") + + return all_issues + + def fix_csv_file(self, file_path, backup=True): + """Fix a problematic CSV file""" + print(f"πŸ”§ Fixing CSV file: {file_path}") + + if backup: + backup_path = f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}" + import shutil + shutil.copy2(file_path, backup_path) + print(f"πŸ“‹ Backup created: {backup_path}") + + # Detect encoding and read file + encoding = self.detect_encoding(file_path) + if not encoding: + print("❌ Could not detect encoding") + return False + + try: + # Read with detected encoding + df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='skip') + + # Clean column names + df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns] + + # Find timestamp column + timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()] + if timestamp_cols: + timestamp_col = timestamp_cols[0] + # Normalize to 'timestamp' + if timestamp_col != 'timestamp': + df = df.rename(columns={timestamp_col: 'timestamp'}) + + # Fix timestamps + df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') + # Remove rows with invalid timestamps + df = df.dropna(subset=['timestamp']) + + # Write fixed file with UTF-8 encoding + df.to_csv(file_path, index=False, encoding='utf-8') + print(f"βœ… Fixed and saved with UTF-8 encoding") + + return True + + except Exception as e: + print(f"❌ Error fixing file: {e}") + return False + + def print_issue_summary(self, issues): + """Print a summary of issues found""" + if not issues: + print("\nπŸŽ‰ No issues found!") + return + + print(f"\nπŸ“Š Summary: {len(issues)} issues found") + + issue_types = {} + for issue in issues: + issue_type = issue['type'] + if issue_type not in issue_types: + issue_types[issue_type] = [] + issue_types[issue_type].append(issue) + + for issue_type, type_issues in issue_types.items(): + print(f"\nπŸ”Έ {issue_type.replace('_', ' ').title()}: {len(type_issues)} files") + for issue in type_issues[:5]: # Show first 5 + print(f" - {os.path.basename(issue['file'])}: {issue['message']}") + if len(type_issues) > 5: + print(f" ... and {len(type_issues) - 5} more") + + +def main(): + parser = argparse.ArgumentParser(description='CSV Validator and Cleaner for PLC Streaming System') + parser.add_argument('path', help='Path to CSV file or directory to process') + parser.add_argument('--scan', action='store_true', help='Scan directory for issues (default)') + parser.add_argument('--fix', action='store_true', help='Fix individual CSV file') + parser.add_argument('--fix-all', action='store_true', help='Fix all problematic files in directory') + parser.add_argument('--no-backup', action='store_true', help='Do not create backup when fixing') + + args = parser.parse_args() + + validator = CSVValidator() + + if args.fix and os.path.isfile(args.path): + # Fix single file + success = validator.fix_csv_file(args.path, backup=not args.no_backup) + sys.exit(0 if success else 1) + + elif os.path.isdir(args.path): + # Scan directory + issues = validator.scan_directory(args.path) + validator.print_issue_summary(issues) + + if args.fix_all and issues: + print(f"\nπŸ”§ Fixing {len(set(issue['file'] for issue in issues))} problematic files...") + + problematic_files = set(issue['file'] for issue in issues if issue['type'] != 'no_timestamp') + fixed_count = 0 + + for file_path in problematic_files: + if validator.fix_csv_file(file_path, backup=not args.no_backup): + fixed_count += 1 + + print(f"βœ… Fixed {fixed_count}/{len(problematic_files)} files") + + sys.exit(1 if issues else 0) + + else: + print(f"❌ Path not found: {args.path}") + sys.exit(1) + + +if __name__ == "__main__": + main()