diff --git a/application_events.json b/application_events.json index ad23ecb..4eea360 100644 --- a/application_events.json +++ b/application_events.json @@ -3928,8 +3928,311 @@ "trigger_variable": null, "auto_started": true } + }, + { + "timestamp": "2025-08-15T00:28:49.611975", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:29:11.885234", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:29:47.127915", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:33:00.587011", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:34:00.990295", + "level": "info", + "event_type": "application_started", + "message": "Application initialization completed successfully", + "details": {} + }, + { + "timestamp": "2025-08-15T00:34:01.069801", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: DAR", + "details": { + "dataset_id": "DAR", + "variables_count": 2, + "streaming_count": 2, + "prefix": "gateway_phoenix" + } + }, + { + "timestamp": "2025-08-15T00:34:01.075470", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: Fast", + "details": { + "dataset_id": "Fast", + "variables_count": 2, + "streaming_count": 1, + "prefix": "fast" + } + }, + { + "timestamp": "2025-08-15T00:34:01.083475", + "level": "info", + "event_type": "csv_recording_started", + "message": "CSV recording started: 2 datasets activated", + "details": { + "activated_datasets": 2, + "total_datasets": 3 + } + }, + { + "timestamp": "2025-08-15T00:35:06.741557", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:35:28.675944", + "level": "info", + "event_type": "application_started", + "message": "Application initialization completed successfully", + "details": {} + }, + { + "timestamp": "2025-08-15T00:35:28.758707", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: DAR", + "details": { + "dataset_id": "DAR", + "variables_count": 2, + "streaming_count": 2, + "prefix": "gateway_phoenix" + } + }, + { + "timestamp": "2025-08-15T00:35:28.764039", + "level": "info", + "event_type": "dataset_activated", + "message": "Dataset activated: Fast", + "details": { + "dataset_id": "Fast", + "variables_count": 2, + "streaming_count": 1, + "prefix": "fast" + } + }, + { + "timestamp": "2025-08-15T00:35:28.768130", + "level": "info", + "event_type": "csv_recording_started", + "message": "CSV recording started: 2 datasets activated", + "details": { + "activated_datasets": 2, + "total_datasets": 3 + } + }, + { + "timestamp": "2025-08-15T00:35:39.619619", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:40:08.431291", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:40:14.468124", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:40:41.861858", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:40:45.084903", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:40:49.549459", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 500, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:41:19.890329", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 100, + "trigger_variable": null, + "auto_started": true + } + }, + { + "timestamp": "2025-08-15T00:41:32.191310", + "level": "info", + "event_type": "plot_session_created", + "message": "Plot session 'UR29' created and started", + "details": { + "session_id": "plot_1", + "variables": [ + "UR29_Brix", + "UR29_ma", + "AUX Blink_1.0S" + ], + "time_window": 3600, + "trigger_variable": null, + "auto_started": true + } } ], - "last_updated": "2025-08-15T00:27:40.211134", - "total_entries": 360 + "last_updated": "2025-08-15T00:41:32.191310", + "total_entries": 381 } \ No newline at end of file diff --git a/config/data/plot_definitions.json b/config/data/plot_definitions.json index 65b829e..c0ada3e 100644 --- a/config/data/plot_definitions.json +++ b/config/data/plot_definitions.json @@ -2,12 +2,12 @@ "plots": [ { "id": "plot_1", - "line_tension": 0.1, + "line_tension": 0, "name": "UR29", "point_hover_radius": 4, "point_radius": 0, - "stepped": false, - "time_window": 500, + "stepped": true, + "time_window": 3600, "trigger_enabled": false, "trigger_on_true": true, "trigger_variable": null, diff --git a/frontend/src/components/ChartjsPlot.jsx b/frontend/src/components/ChartjsPlot.jsx index 390c943..d584638 100644 --- a/frontend/src/components/ChartjsPlot.jsx +++ b/frontend/src/components/ChartjsPlot.jsx @@ -67,6 +67,9 @@ const ChartjsPlot = ({ session, height = '400px' }) => { const [isRefreshing, setIsRefreshing] = useState(false); const [isLoadingHistorical, setIsLoadingHistorical] = useState(false); const resolvedConfigRef = useRef(null); + + // Data preservation system for zoom operations + const dataBackupRef = useRef(new Map()); const bgColor = useColorModeValue('white', 'gray.800'); const textColor = useColorModeValue('gray.600', 'gray.300'); @@ -580,17 +583,55 @@ const ChartjsPlot = ({ session, height = '400px' }) => { }, ...(zoomAvailable ? { zoom: { - // Solo habilitar zoom/pan en modo fullscreen + // Habilitar zoom/pan siempre, pero con configuraciΓ³n optimizada para streaming pan: { - enabled: !!session?.isFullscreen, + enabled: true, mode: 'x', - modifierKey: 'shift' + modifierKey: 'shift', + onPanComplete: function(context) { + // Preserve streaming state after pan + const chart = context.chart; + if (chart.options?.scales?.x?.realtime) { + const realtime = chart.options.scales.x.realtime; + // Ensure streaming continues after pan + if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) { + realtime.pause = false; + } + } + } }, zoom: { - drag: { enabled: !!session?.isFullscreen }, - wheel: { enabled: !!session?.isFullscreen }, - pinch: { enabled: !!session?.isFullscreen }, - mode: 'x' + drag: { + enabled: true, + backgroundColor: 'rgba(128,128,128,0.3)' + }, + wheel: { + enabled: true, + speed: 0.1 + }, + pinch: { enabled: true }, + mode: 'x', + onZoomComplete: function(context) { + // Preserve streaming state and data after zoom + const chart = context.chart; + console.log('πŸ” Zoom completed, preserving streaming state...'); + + if (chart.options?.scales?.x?.realtime) { + const realtime = chart.options.scales.x.realtime; + // Don't auto-pause streaming after zoom + if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) { + setTimeout(() => { + realtime.pause = false; + }, 10); + } + } + } + }, + limits: { + x: { + min: 'original', + max: 'original' + } } } } : {}) @@ -830,6 +871,18 @@ const ChartjsPlot = ({ session, height = '400px' }) => { // Update chart if (pointsAdded > 0) { chart.update('quiet'); + + // Backup data periodically when significant data is added + if (pointsAdded > 5 && dataBackupRef.current) { + const backup = chart.data.datasets.map(dataset => ({ + label: dataset.label, + data: [...(dataset.data || [])], + timestamp: Date.now() + })); + + dataBackupRef.current.set('current', backup); + console.log(`πŸ“¦ Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`); + } } // Clear NaN insertion flag @@ -982,17 +1035,78 @@ const ChartjsPlot = ({ session, height = '400px' }) => { if (!chartRef.current) return; try { + console.log('πŸ”„ Resetting zoom...'); + + // Backup data before zoom reset + const backup = chartRef.current.data.datasets.map(dataset => ({ + label: dataset.label, + data: [...(dataset.data || [])], + timestamp: Date.now() + })); + + dataBackupRef.current.set('current', backup); + console.log(`πŸ“¦ Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`); + + // Store current streaming state + const realtimeOptions = chartRef.current.options?.scales?.x?.realtime; + const wasPaused = realtimeOptions?.pause || false; + // Try to reset zoom using the zoom plugin if (chartRef.current.resetZoom) { - chartRef.current.resetZoom(); + chartRef.current.resetZoom('none'); // Use 'none' animation mode for faster reset } else if (window.Chart?.helpers?.getRelativePosition) { // Fallback: manually reset zoom by updating scale options const chart = chartRef.current; if (chart.options?.scales?.x?.realtime) { - // For realtime charts, just trigger an update + // For realtime charts, reset the scale manually + const now = Date.now(); + chart.options.scales.x.realtime.duration = chart.options.scales.x.realtime.duration || 60000; + chart.options.scales.x.min = now - chart.options.scales.x.realtime.duration; + chart.options.scales.x.max = now; chart.update('none'); } } + + // Restore data if it was lost during zoom reset + setTimeout(() => { + if (chartRef.current && dataBackupRef.current.has('current')) { + const backupData = dataBackupRef.current.get('current'); + const now = Date.now(); + + // Only restore if backup is recent (within 30 seconds) + if (now - backupData[0]?.timestamp <= 30000) { + let restored = false; + chartRef.current.data.datasets.forEach((dataset, index) => { + const backupDataset = backupData[index]; + if (backupDataset && dataset.label === backupDataset.label) { + // Only restore if current data is significantly smaller + if (!dataset.data || dataset.data.length < backupDataset.data.length * 0.3) { + dataset.data = [...backupDataset.data]; + restored = true; + } + } + }); + + if (restored) { + console.log('πŸ”„ Chart data restored from backup'); + chartRef.current.update('none'); + + const totalPoints = chartRef.current.data.datasets.reduce((total, dataset) => + total + (dataset.data?.length || 0), 0 + ); + setDataPointsCount(totalPoints); + } + } + + // Restore streaming state + if (realtimeOptions && !wasPaused) { + realtimeOptions.pause = false; + } + + console.log('βœ… Zoom reset with data preservation complete'); + } + }, 100); // Small delay to ensure zoom reset is complete + } catch (error) { console.warn('Failed to reset zoom:', error); } @@ -1339,6 +1453,35 @@ const ChartjsPlot = ({ session, height = '400px' }) => { )} + {/* Reset Zoom Button */} + {chartRef.current && ( + + + + )} + 0] + headers = [ + h + for h in headers + if h and len(h.replace("\x00", "").strip()) > 0 + ] print(f"πŸ” DEBUG: Headers in {csv_file}: {headers}") # Check if any of our variables are in this file @@ -1961,65 +1971,90 @@ def get_historical_data(): # Read the CSV file with proper encoding and error handling print(f"πŸ” DEBUG: Reading CSV file with pandas...") df = None - for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']: + for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]: try: - df = pd.read_csv(csv_file, encoding=encoding, on_bad_lines='skip') - print(f"πŸ” DEBUG: CSV loaded with {encoding}, shape: {df.shape}") + df = pd.read_csv( + csv_file, encoding=encoding, on_bad_lines="skip" + ) + print( + f"πŸ” DEBUG: CSV loaded with {encoding}, shape: {df.shape}" + ) break - except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError): + except ( + UnicodeDecodeError, + UnicodeError, + pd.errors.ParserError, + ): continue except Exception as e: print(f"πŸ” DEBUG: Error reading with {encoding}: {e}") continue - + if df is None: - print(f"Warning: Could not read CSV file {csv_file} with any encoding") + print( + f"Warning: Could not read CSV file {csv_file} with any encoding" + ) continue - + # Clean column names from BOM and unicode artifacts - df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns] + df.columns = [ + col.replace("\ufeff", "").replace("\x00", "").strip() + for col in df.columns + ] # Convert timestamp to datetime with flexible parsing print(f"πŸ” DEBUG: Converting timestamps...") timestamp_col = None for col in df.columns: - if 'timestamp' in col.lower(): + if "timestamp" in col.lower(): timestamp_col = col break - + if timestamp_col is None: - print(f"πŸ” DEBUG: No timestamp column found in {csv_file}, skipping") + print( + f"πŸ” DEBUG: No timestamp column found in {csv_file}, skipping" + ) continue - + try: # Try multiple timestamp formats - df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') + df[timestamp_col] = pd.to_datetime( + df[timestamp_col], errors="coerce" + ) # Remove rows with invalid timestamps df = df.dropna(subset=[timestamp_col]) - + if df.empty: - print(f"πŸ” DEBUG: No valid timestamps in {csv_file}, skipping") + print( + f"πŸ” DEBUG: No valid timestamps in {csv_file}, skipping" + ) continue - + # Normalize column name to 'timestamp' - if timestamp_col != 'timestamp': - df = df.rename(columns={timestamp_col: 'timestamp'}) - + if timestamp_col != "timestamp": + df = df.rename(columns={timestamp_col: "timestamp"}) + print( f"πŸ” DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}" ) print(f"πŸ” DEBUG: Filter range: {start_time} to {end_time}") except Exception as e: - print(f"πŸ” DEBUG: Timestamp conversion failed for {csv_file}: {e}") + print( + f"πŸ” DEBUG: Timestamp conversion failed for {csv_file}: {e}" + ) continue # Recalculate matching variables after column cleaning clean_headers = list(df.columns) matching_vars = [var for var in variables if var in clean_headers] - print(f"πŸ” DEBUG: Matching variables after cleaning: {matching_vars}") + print( + f"πŸ” DEBUG: Matching variables after cleaning: {matching_vars}" + ) if not matching_vars: - print(f"πŸ” DEBUG: No matching variables after cleaning in {csv_file}, skipping") + print( + f"πŸ” DEBUG: No matching variables after cleaning in {csv_file}, skipping" + ) continue # Filter by time range @@ -2069,7 +2104,9 @@ def get_historical_data(): ) except Exception as e: # Skip invalid values - print(f"πŸ” DEBUG: Skipping invalid value for {var}: {e}") + print( + f"πŸ” DEBUG: Skipping invalid value for {var}: {e}" + ) continue except Exception as e: diff --git a/system_state.json b/system_state.json index b36a226..1491628 100644 --- a/system_state.json +++ b/system_state.json @@ -3,11 +3,11 @@ "should_connect": true, "should_stream": false, "active_datasets": [ - "Test", + "DAR", "Fast", - "DAR" + "Test" ] }, "auto_recovery_enabled": true, - "last_update": "2025-08-15T00:26:15.383017" + "last_update": "2025-08-15T00:35:28.773668" } \ No newline at end of file diff --git a/utils/csv_validator.py b/utils/csv_validator.py index f0175a0..4eceaed 100644 --- a/utils/csv_validator.py +++ b/utils/csv_validator.py @@ -25,228 +25,254 @@ from datetime import datetime class CSVValidator: """Validates and cleans CSV files for the PLC streaming system""" - + def __init__(self): - self.encodings_to_try = ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1', 'cp1252'] + self.encodings_to_try = ["utf-8", "utf-8-sig", "utf-16", "latin-1", "cp1252"] self.issues_found = [] self.fixed_files = [] - + def detect_encoding(self, file_path): """Detect the encoding of a CSV file""" for encoding in self.encodings_to_try: try: - with open(file_path, 'r', encoding=encoding) as f: + with open(file_path, "r", encoding=encoding) as f: f.read(1024) # Read first 1KB return encoding except (UnicodeDecodeError, UnicodeError): continue return None - + def read_csv_headers(self, file_path): """Read CSV headers with encoding detection""" encoding = self.detect_encoding(file_path) if not encoding: return None, None, "Could not detect encoding" - + try: - with open(file_path, 'r', encoding=encoding) as f: + with open(file_path, "r", encoding=encoding) as f: header_line = f.readline().strip() if not header_line: return None, encoding, "Empty header line" - + # Clean header from BOM and unicode artifacts - header_line = header_line.replace('\ufeff', '').replace('\x00', '') - headers = [h.strip().replace('\x00', '') for h in header_line.split(',')] + header_line = header_line.replace("\ufeff", "").replace("\x00", "") + headers = [ + h.strip().replace("\x00", "") for h in header_line.split(",") + ] headers = [h for h in headers if h and len(h.strip()) > 0] - + return headers, encoding, None except Exception as e: return None, encoding, str(e) - + def validate_csv_structure(self, file_path): """Validate CSV structure and detect issues""" issues = [] - + # Check headers headers, encoding, header_error = self.read_csv_headers(file_path) if header_error: - issues.append({ - 'type': 'header_error', - 'message': header_error, - 'file': file_path - }) + issues.append( + {"type": "header_error", "message": header_error, "file": file_path} + ) return issues - + if not headers: - issues.append({ - 'type': 'no_headers', - 'message': 'No valid headers found', - 'file': file_path - }) + issues.append( + { + "type": "no_headers", + "message": "No valid headers found", + "file": file_path, + } + ) return issues - + # Check for encoding issues in headers - if any('\x00' in h or 'ΓΏΓΎ' in h or '' in h for h in headers): - issues.append({ - 'type': 'encoding_artifacts', - 'message': f'Headers contain encoding artifacts: {headers}', - 'file': file_path, - 'encoding': encoding - }) - + if any("\x00" in h or "ΓΏΓΎ" in h or "" in h for h in headers): + issues.append( + { + "type": "encoding_artifacts", + "message": f"Headers contain encoding artifacts: {headers}", + "file": file_path, + "encoding": encoding, + } + ) + # Try to read full CSV with pandas try: df = None for enc in self.encodings_to_try: try: - df = pd.read_csv(file_path, encoding=enc, on_bad_lines='skip') + df = pd.read_csv(file_path, encoding=enc, on_bad_lines="skip") break except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError): continue - + if df is None: - issues.append({ - 'type': 'read_error', - 'message': 'Could not read CSV with any encoding', - 'file': file_path - }) + issues.append( + { + "type": "read_error", + "message": "Could not read CSV with any encoding", + "file": file_path, + } + ) return issues - + # Check for inconsistent columns expected_cols = len(headers) if len(df.columns) != expected_cols: - issues.append({ - 'type': 'column_mismatch', - 'message': f'Expected {expected_cols} columns, found {len(df.columns)}', - 'file': file_path, - 'expected_headers': headers, - 'actual_headers': list(df.columns) - }) - + issues.append( + { + "type": "column_mismatch", + "message": f"Expected {expected_cols} columns, found {len(df.columns)}", + "file": file_path, + "expected_headers": headers, + "actual_headers": list(df.columns), + } + ) + # Check for timestamp column - timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()] + timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()] if not timestamp_cols: - issues.append({ - 'type': 'no_timestamp', - 'message': 'No timestamp column found', - 'file': file_path, - 'columns': list(df.columns) - }) + issues.append( + { + "type": "no_timestamp", + "message": "No timestamp column found", + "file": file_path, + "columns": list(df.columns), + } + ) else: # Validate timestamp format timestamp_col = timestamp_cols[0] try: - df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce') + df[timestamp_col] = pd.to_datetime( + df[timestamp_col], errors="coerce" + ) invalid_timestamps = df[timestamp_col].isna().sum() if invalid_timestamps > 0: - issues.append({ - 'type': 'invalid_timestamps', - 'message': f'{invalid_timestamps} invalid timestamps found', - 'file': file_path, - 'timestamp_column': timestamp_col - }) + issues.append( + { + "type": "invalid_timestamps", + "message": f"{invalid_timestamps} invalid timestamps found", + "file": file_path, + "timestamp_column": timestamp_col, + } + ) except Exception as e: - issues.append({ - 'type': 'timestamp_parse_error', - 'message': f'Cannot parse timestamps: {str(e)}', - 'file': file_path, - 'timestamp_column': timestamp_col - }) - + issues.append( + { + "type": "timestamp_parse_error", + "message": f"Cannot parse timestamps: {str(e)}", + "file": file_path, + "timestamp_column": timestamp_col, + } + ) + except Exception as e: - issues.append({ - 'type': 'general_error', - 'message': f'Error reading CSV: {str(e)}', - 'file': file_path - }) - + issues.append( + { + "type": "general_error", + "message": f"Error reading CSV: {str(e)}", + "file": file_path, + } + ) + return issues - + def scan_directory(self, directory_path): """Scan directory for CSV issues""" print(f"πŸ” Scanning directory: {directory_path}") - + csv_files = glob.glob(os.path.join(directory_path, "**/*.csv"), recursive=True) total_files = len(csv_files) - + print(f"πŸ“ Found {total_files} CSV files") - + all_issues = [] for i, csv_file in enumerate(csv_files, 1): print(f"πŸ“„ Checking {i}/{total_files}: {os.path.basename(csv_file)}") - + issues = self.validate_csv_structure(csv_file) if issues: all_issues.extend(issues) print(f" ⚠️ {len(issues)} issues found") else: print(f" βœ… OK") - + return all_issues - + def fix_csv_file(self, file_path, backup=True): """Fix a problematic CSV file""" print(f"πŸ”§ Fixing CSV file: {file_path}") - + if backup: - backup_path = f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}" + backup_path = ( + f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}" + ) import shutil + shutil.copy2(file_path, backup_path) print(f"πŸ“‹ Backup created: {backup_path}") - + # Detect encoding and read file encoding = self.detect_encoding(file_path) if not encoding: print("❌ Could not detect encoding") return False - + try: # Read with detected encoding - df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='skip') - + df = pd.read_csv(file_path, encoding=encoding, on_bad_lines="skip") + # Clean column names - df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns] - + df.columns = [ + col.replace("\ufeff", "").replace("\x00", "").strip() + for col in df.columns + ] + # Find timestamp column - timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()] + timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()] if timestamp_cols: timestamp_col = timestamp_cols[0] # Normalize to 'timestamp' - if timestamp_col != 'timestamp': - df = df.rename(columns={timestamp_col: 'timestamp'}) - + if timestamp_col != "timestamp": + df = df.rename(columns={timestamp_col: "timestamp"}) + # Fix timestamps - df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') + df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") # Remove rows with invalid timestamps - df = df.dropna(subset=['timestamp']) - + df = df.dropna(subset=["timestamp"]) + # Write fixed file with UTF-8 encoding - df.to_csv(file_path, index=False, encoding='utf-8') + df.to_csv(file_path, index=False, encoding="utf-8") print(f"βœ… Fixed and saved with UTF-8 encoding") - + return True - + except Exception as e: print(f"❌ Error fixing file: {e}") return False - + def print_issue_summary(self, issues): """Print a summary of issues found""" if not issues: print("\nπŸŽ‰ No issues found!") return - + print(f"\nπŸ“Š Summary: {len(issues)} issues found") - + issue_types = {} for issue in issues: - issue_type = issue['type'] + issue_type = issue["type"] if issue_type not in issue_types: issue_types[issue_type] = [] issue_types[issue_type].append(issue) - + for issue_type, type_issues in issue_types.items(): - print(f"\nπŸ”Έ {issue_type.replace('_', ' ').title()}: {len(type_issues)} files") + print( + f"\nπŸ”Έ {issue_type.replace('_', ' ').title()}: {len(type_issues)} files" + ) for issue in type_issues[:5]: # Show first 5 print(f" - {os.path.basename(issue['file'])}: {issue['message']}") if len(type_issues) > 5: @@ -254,41 +280,53 @@ class CSVValidator: def main(): - parser = argparse.ArgumentParser(description='CSV Validator and Cleaner for PLC Streaming System') - parser.add_argument('path', help='Path to CSV file or directory to process') - parser.add_argument('--scan', action='store_true', help='Scan directory for issues (default)') - parser.add_argument('--fix', action='store_true', help='Fix individual CSV file') - parser.add_argument('--fix-all', action='store_true', help='Fix all problematic files in directory') - parser.add_argument('--no-backup', action='store_true', help='Do not create backup when fixing') - + parser = argparse.ArgumentParser( + description="CSV Validator and Cleaner for PLC Streaming System" + ) + parser.add_argument("path", help="Path to CSV file or directory to process") + parser.add_argument( + "--scan", action="store_true", help="Scan directory for issues (default)" + ) + parser.add_argument("--fix", action="store_true", help="Fix individual CSV file") + parser.add_argument( + "--fix-all", action="store_true", help="Fix all problematic files in directory" + ) + parser.add_argument( + "--no-backup", action="store_true", help="Do not create backup when fixing" + ) + args = parser.parse_args() - + validator = CSVValidator() - + if args.fix and os.path.isfile(args.path): # Fix single file success = validator.fix_csv_file(args.path, backup=not args.no_backup) sys.exit(0 if success else 1) - + elif os.path.isdir(args.path): # Scan directory issues = validator.scan_directory(args.path) validator.print_issue_summary(issues) - + if args.fix_all and issues: - print(f"\nπŸ”§ Fixing {len(set(issue['file'] for issue in issues))} problematic files...") - - problematic_files = set(issue['file'] for issue in issues if issue['type'] != 'no_timestamp') + print( + f"\nπŸ”§ Fixing {len(set(issue['file'] for issue in issues))} problematic files..." + ) + + problematic_files = set( + issue["file"] for issue in issues if issue["type"] != "no_timestamp" + ) fixed_count = 0 - + for file_path in problematic_files: if validator.fix_csv_file(file_path, backup=not args.no_backup): fixed_count += 1 - + print(f"βœ… Fixed {fixed_count}/{len(problematic_files)} files") - + sys.exit(1 if issues else 0) - + else: print(f"❌ Path not found: {args.path}") sys.exit(1)