feat: Enhance event logging, improve CSV handling, and optimize Chart.js zoom functionality

2025-08-15 00:42:14 +02:00 · 2025-08-15 00:42:14 +02:00 · b864e81aa3
parent e517f40a5d
commit b864e81aa3
6 changed files with 693 additions and 172 deletions
--- a/application_events.json
+++ b/application_events.json
@ -3928,8 +3928,311 @@
        "trigger_variable": null,
        "auto_started": true
      }
+    },
+    {
+      "timestamp": "2025-08-15T00:28:49.611975",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:29:11.885234",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:29:47.127915",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:33:00.587011",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:34:00.990295",
+      "level": "info",
+      "event_type": "application_started",
+      "message": "Application initialization completed successfully",
+      "details": {}
+    },
+    {
+      "timestamp": "2025-08-15T00:34:01.069801",
+      "level": "info",
+      "event_type": "dataset_activated",
+      "message": "Dataset activated: DAR",
+      "details": {
+        "dataset_id": "DAR",
+        "variables_count": 2,
+        "streaming_count": 2,
+        "prefix": "gateway_phoenix"
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:34:01.075470",
+      "level": "info",
+      "event_type": "dataset_activated",
+      "message": "Dataset activated: Fast",
+      "details": {
+        "dataset_id": "Fast",
+        "variables_count": 2,
+        "streaming_count": 1,
+        "prefix": "fast"
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:34:01.083475",
+      "level": "info",
+      "event_type": "csv_recording_started",
+      "message": "CSV recording started: 2 datasets activated",
+      "details": {
+        "activated_datasets": 2,
+        "total_datasets": 3
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:35:06.741557",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:35:28.675944",
+      "level": "info",
+      "event_type": "application_started",
+      "message": "Application initialization completed successfully",
+      "details": {}
+    },
+    {
+      "timestamp": "2025-08-15T00:35:28.758707",
+      "level": "info",
+      "event_type": "dataset_activated",
+      "message": "Dataset activated: DAR",
+      "details": {
+        "dataset_id": "DAR",
+        "variables_count": 2,
+        "streaming_count": 2,
+        "prefix": "gateway_phoenix"
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:35:28.764039",
+      "level": "info",
+      "event_type": "dataset_activated",
+      "message": "Dataset activated: Fast",
+      "details": {
+        "dataset_id": "Fast",
+        "variables_count": 2,
+        "streaming_count": 1,
+        "prefix": "fast"
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:35:28.768130",
+      "level": "info",
+      "event_type": "csv_recording_started",
+      "message": "CSV recording started: 2 datasets activated",
+      "details": {
+        "activated_datasets": 2,
+        "total_datasets": 3
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:35:39.619619",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:40:08.431291",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:40:14.468124",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:40:41.861858",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:40:45.084903",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:40:49.549459",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 500,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:41:19.890329",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 100,
+        "trigger_variable": null,
+        "auto_started": true
+      }
+    },
+    {
+      "timestamp": "2025-08-15T00:41:32.191310",
+      "level": "info",
+      "event_type": "plot_session_created",
+      "message": "Plot session 'UR29' created and started",
+      "details": {
+        "session_id": "plot_1",
+        "variables": [
+          "UR29_Brix",
+          "UR29_ma",
+          "AUX Blink_1.0S"
+        ],
+        "time_window": 3600,
+        "trigger_variable": null,
+        "auto_started": true
+      }
    }
  ],
-  "last_updated": "2025-08-15T00:27:40.211134",
-  "total_entries": 360
+  "last_updated": "2025-08-15T00:41:32.191310",
+  "total_entries": 381
 }
--- a/config/data/plot_definitions.json
+++ b/config/data/plot_definitions.json
@ -2,12 +2,12 @@
  "plots": [
    {
      "id": "plot_1",
-      "line_tension": 0.1,
+      "line_tension": 0,
      "name": "UR29",
      "point_hover_radius": 4,
      "point_radius": 0,
-      "stepped": false,
-      "time_window": 500,
+      "stepped": true,
+      "time_window": 3600,
      "trigger_enabled": false,
      "trigger_on_true": true,
      "trigger_variable": null,
--- a/frontend/src/components/ChartjsPlot.jsx
+++ b/frontend/src/components/ChartjsPlot.jsx
@ -67,6 +67,9 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
    const [isRefreshing, setIsRefreshing] = useState(false);
    const [isLoadingHistorical, setIsLoadingHistorical] = useState(false);
    const resolvedConfigRef = useRef(null);
+    
+    // Data preservation system for zoom operations
+    const dataBackupRef = useRef(new Map());

    const bgColor = useColorModeValue('white', 'gray.800');
    const textColor = useColorModeValue('gray.600', 'gray.300');
@ -580,17 +583,55 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
                        },
                        ...(zoomAvailable ? {
                            zoom: {
-                                // Solo habilitar zoom/pan en modo fullscreen
+                                // Habilitar zoom/pan siempre, pero con configuración optimizada para streaming
                                pan: {
-                                    enabled: !!session?.isFullscreen,
+                                    enabled: true,
                                    mode: 'x',
-                                    modifierKey: 'shift'
+                                    modifierKey: 'shift',
+                                    onPanComplete: function(context) {
+                                        // Preserve streaming state after pan
+                                        const chart = context.chart;
+                                        if (chart.options?.scales?.x?.realtime) {
+                                            const realtime = chart.options.scales.x.realtime;
+                                            // Ensure streaming continues after pan
+                                            if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) {
+                                                realtime.pause = false;
+                                            }
+                                        }
+                                    }
                                },
                                zoom: {
-                                    drag: { enabled: !!session?.isFullscreen },
-                                    wheel: { enabled: !!session?.isFullscreen },
-                                    pinch: { enabled: !!session?.isFullscreen },
-                                    mode: 'x'
+                                    drag: { 
+                                        enabled: true,
+                                        backgroundColor: 'rgba(128,128,128,0.3)'
+                                    },
+                                    wheel: { 
+                                        enabled: true,
+                                        speed: 0.1
+                                    },
+                                    pinch: { enabled: true },
+                                    mode: 'x',
+                                    onZoomComplete: function(context) {
+                                        // Preserve streaming state and data after zoom
+                                        const chart = context.chart;
+                                        console.log('🔍 Zoom completed, preserving streaming state...');
+                                        
+                                        if (chart.options?.scales?.x?.realtime) {
+                                            const realtime = chart.options.scales.x.realtime;
+                                            // Don't auto-pause streaming after zoom
+                                            if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) {
+                                                setTimeout(() => {
+                                                    realtime.pause = false;
+                                                }, 10);
+                                            }
+                                        }
+                                    }
+                                },
+                                limits: {
+                                    x: {
+                                        min: 'original',
+                                        max: 'original'
+                                    }
                                }
                            }
                        } : {})
@ -830,6 +871,18 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
        // Update chart
        if (pointsAdded > 0) {
            chart.update('quiet');
+            
+            // Backup data periodically when significant data is added
+            if (pointsAdded > 5 && dataBackupRef.current) {
+                const backup = chart.data.datasets.map(dataset => ({
+                    label: dataset.label,
+                    data: [...(dataset.data || [])],
+                    timestamp: Date.now()
+                }));
+                
+                dataBackupRef.current.set('current', backup);
+                console.log(`📦 Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`);
+            }
        }

        // Clear NaN insertion flag
@ -982,17 +1035,78 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
        if (!chartRef.current) return;

        try {
+            console.log('🔄 Resetting zoom...');
+            
+            // Backup data before zoom reset
+            const backup = chartRef.current.data.datasets.map(dataset => ({
+                label: dataset.label,
+                data: [...(dataset.data || [])],
+                timestamp: Date.now()
+            }));
+            
+            dataBackupRef.current.set('current', backup);
+            console.log(`📦 Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`);
+            
+            // Store current streaming state
+            const realtimeOptions = chartRef.current.options?.scales?.x?.realtime;
+            const wasPaused = realtimeOptions?.pause || false;
+
            // Try to reset zoom using the zoom plugin
            if (chartRef.current.resetZoom) {
-                chartRef.current.resetZoom();
+                chartRef.current.resetZoom('none'); // Use 'none' animation mode for faster reset
            } else if (window.Chart?.helpers?.getRelativePosition) {
                // Fallback: manually reset zoom by updating scale options
                const chart = chartRef.current;
                if (chart.options?.scales?.x?.realtime) {
-                    // For realtime charts, just trigger an update
+                    // For realtime charts, reset the scale manually
+                    const now = Date.now();
+                    chart.options.scales.x.realtime.duration = chart.options.scales.x.realtime.duration || 60000;
+                    chart.options.scales.x.min = now - chart.options.scales.x.realtime.duration;
+                    chart.options.scales.x.max = now;
                    chart.update('none');
                }
            }
+
+            // Restore data if it was lost during zoom reset
+            setTimeout(() => {
+                if (chartRef.current && dataBackupRef.current.has('current')) {
+                    const backupData = dataBackupRef.current.get('current');
+                    const now = Date.now();
+                    
+                    // Only restore if backup is recent (within 30 seconds)
+                    if (now - backupData[0]?.timestamp <= 30000) {
+                        let restored = false;
+                        chartRef.current.data.datasets.forEach((dataset, index) => {
+                            const backupDataset = backupData[index];
+                            if (backupDataset && dataset.label === backupDataset.label) {
+                                // Only restore if current data is significantly smaller
+                                if (!dataset.data || dataset.data.length < backupDataset.data.length * 0.3) {
+                                    dataset.data = [...backupDataset.data];
+                                    restored = true;
+                                }
+                            }
+                        });
+                        
+                        if (restored) {
+                            console.log('🔄 Chart data restored from backup');
+                            chartRef.current.update('none');
+                            
+                            const totalPoints = chartRef.current.data.datasets.reduce((total, dataset) => 
+                                total + (dataset.data?.length || 0), 0
+                            );
+                            setDataPointsCount(totalPoints);
+                        }
+                    }
+                    
+                    // Restore streaming state
+                    if (realtimeOptions && !wasPaused) {
+                        realtimeOptions.pause = false;
+                    }
+                    
+                    console.log('✅ Zoom reset with data preservation complete');
+                }
+            }, 100); // Small delay to ensure zoom reset is complete
+
        } catch (error) {
            console.warn('Failed to reset zoom:', error);
        }
@ -1339,6 +1453,35 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
                </Box>
            )}

+            {/* Reset Zoom Button */}
+            {chartRef.current && (
+                <Box
+                    position="absolute"
+                    bottom={2}
+                    right={2}
+                    zIndex={10}
+                >
+                    <button
+                        onClick={resetZoom}
+                        style={{
+                            background: 'rgba(0, 0, 0, 0.7)',
+                            color: 'white',
+                            border: 'none',
+                            borderRadius: '4px',
+                            padding: '4px 8px',
+                            fontSize: '11px',
+                            cursor: 'pointer',
+                            display: 'flex',
+                            alignItems: 'center',
+                            gap: '4px'
+                        }}
+                        title="Reset zoom and restore data"
+                    >
+                        🔄 Reset Zoom
+                    </button>
+                </Box>
+            )}
+
            <canvas
                ref={canvasRef}
                style={{
--- a/main.py
+++ b/main.py
@ -1925,27 +1925,37 @@ def get_historical_data():

                    # Read first line to check if any required variables are present with proper encoding handling
                    header_line = None
-                    for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']:
+                    for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]:
                        try:
                            with open(csv_file, "r", encoding=encoding) as f:
                                header_line = f.readline().strip()
                                if header_line:
-                                    print(f"🔍 DEBUG: Successfully read header with {encoding} encoding")
+                                    print(
+                                        f"🔍 DEBUG: Successfully read header with {encoding} encoding"
+                                    )
                                    break
                        except (UnicodeDecodeError, UnicodeError):
                            continue
                        except Exception:
                            continue
-                    
+
                    if not header_line:
-                        print(f"🔍 DEBUG: Could not read header from {csv_file}, skipping")
+                        print(
+                            f"🔍 DEBUG: Could not read header from {csv_file}, skipping"
+                        )
                        continue

                    # Clean header line from BOM and normalize
-                    header_line = header_line.replace('\ufeff', '').replace('\x00', '')
-                    headers = [h.strip().replace('\x00', '') for h in header_line.split(",")]
+                    header_line = header_line.replace("\ufeff", "").replace("\x00", "")
+                    headers = [
+                        h.strip().replace("\x00", "") for h in header_line.split(",")
+                    ]
                    # Clean any remaining unicode artifacts
-                    headers = [h for h in headers if h and len(h.replace('\x00', '').strip()) > 0]
+                    headers = [
+                        h
+                        for h in headers
+                        if h and len(h.replace("\x00", "").strip()) > 0
+                    ]
                    print(f"🔍 DEBUG: Headers in {csv_file}: {headers}")

                    # Check if any of our variables are in this file
@ -1961,65 +1971,90 @@ def get_historical_data():
                    # Read the CSV file with proper encoding and error handling
                    print(f"🔍 DEBUG: Reading CSV file with pandas...")
                    df = None
-                    for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']:
+                    for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]:
                        try:
-                            df = pd.read_csv(csv_file, encoding=encoding, on_bad_lines='skip')
-                            print(f"🔍 DEBUG: CSV loaded with {encoding}, shape: {df.shape}")
+                            df = pd.read_csv(
+                                csv_file, encoding=encoding, on_bad_lines="skip"
+                            )
+                            print(
+                                f"🔍 DEBUG: CSV loaded with {encoding}, shape: {df.shape}"
+                            )
                            break
-                        except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError):
+                        except (
+                            UnicodeDecodeError,
+                            UnicodeError,
+                            pd.errors.ParserError,
+                        ):
                            continue
                        except Exception as e:
                            print(f"🔍 DEBUG: Error reading with {encoding}: {e}")
                            continue
-                    
+
                    if df is None:
-                        print(f"Warning: Could not read CSV file {csv_file} with any encoding")
+                        print(
+                            f"Warning: Could not read CSV file {csv_file} with any encoding"
+                        )
                        continue
-                    
+
                    # Clean column names from BOM and unicode artifacts
-                    df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns]
+                    df.columns = [
+                        col.replace("\ufeff", "").replace("\x00", "").strip()
+                        for col in df.columns
+                    ]

                    # Convert timestamp to datetime with flexible parsing
                    print(f"🔍 DEBUG: Converting timestamps...")
                    timestamp_col = None
                    for col in df.columns:
-                        if 'timestamp' in col.lower():
+                        if "timestamp" in col.lower():
                            timestamp_col = col
                            break
-                    
+
                    if timestamp_col is None:
-                        print(f"🔍 DEBUG: No timestamp column found in {csv_file}, skipping")
+                        print(
+                            f"🔍 DEBUG: No timestamp column found in {csv_file}, skipping"
+                        )
                        continue
-                    
+
                    try:
                        # Try multiple timestamp formats
-                        df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')
+                        df[timestamp_col] = pd.to_datetime(
+                            df[timestamp_col], errors="coerce"
+                        )
                        # Remove rows with invalid timestamps
                        df = df.dropna(subset=[timestamp_col])
-                        
+
                        if df.empty:
-                            print(f"🔍 DEBUG: No valid timestamps in {csv_file}, skipping")
+                            print(
+                                f"🔍 DEBUG: No valid timestamps in {csv_file}, skipping"
+                            )
                            continue
-                            
+
                        # Normalize column name to 'timestamp'
-                        if timestamp_col != 'timestamp':
-                            df = df.rename(columns={timestamp_col: 'timestamp'})
-                            
+                        if timestamp_col != "timestamp":
+                            df = df.rename(columns={timestamp_col: "timestamp"})
+
                        print(
                            f"🔍 DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}"
                        )
                        print(f"🔍 DEBUG: Filter range: {start_time} to {end_time}")
                    except Exception as e:
-                        print(f"🔍 DEBUG: Timestamp conversion failed for {csv_file}: {e}")
+                        print(
+                            f"🔍 DEBUG: Timestamp conversion failed for {csv_file}: {e}"
+                        )
                        continue

                    # Recalculate matching variables after column cleaning
                    clean_headers = list(df.columns)
                    matching_vars = [var for var in variables if var in clean_headers]
-                    print(f"🔍 DEBUG: Matching variables after cleaning: {matching_vars}")
+                    print(
+                        f"🔍 DEBUG: Matching variables after cleaning: {matching_vars}"
+                    )

                    if not matching_vars:
-                        print(f"🔍 DEBUG: No matching variables after cleaning in {csv_file}, skipping")
+                        print(
+                            f"🔍 DEBUG: No matching variables after cleaning in {csv_file}, skipping"
+                        )
                        continue

                    # Filter by time range
@ -2069,7 +2104,9 @@ def get_historical_data():
                                    )
                                except Exception as e:
                                    # Skip invalid values
-                                    print(f"🔍 DEBUG: Skipping invalid value for {var}: {e}")
+                                    print(
+                                        f"🔍 DEBUG: Skipping invalid value for {var}: {e}"
+                                    )
                                    continue

                except Exception as e:
--- a/system_state.json
+++ b/system_state.json
@ -3,11 +3,11 @@
        "should_connect": true,
        "should_stream": false,
        "active_datasets": [
-            "Test",
+            "DAR",
            "Fast",
-            "DAR"
+            "Test"
        ]
    },
    "auto_recovery_enabled": true,
-    "last_update": "2025-08-15T00:26:15.383017"
+    "last_update": "2025-08-15T00:35:28.773668"
 }
--- a/utils/csv_validator.py
+++ b/utils/csv_validator.py
@ -25,228 +25,254 @@ from datetime import datetime

 class CSVValidator:
    """Validates and cleans CSV files for the PLC streaming system"""
-    
+
    def __init__(self):
-        self.encodings_to_try = ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1', 'cp1252']
+        self.encodings_to_try = ["utf-8", "utf-8-sig", "utf-16", "latin-1", "cp1252"]
        self.issues_found = []
        self.fixed_files = []
-        
+
    def detect_encoding(self, file_path):
        """Detect the encoding of a CSV file"""
        for encoding in self.encodings_to_try:
            try:
-                with open(file_path, 'r', encoding=encoding) as f:
+                with open(file_path, "r", encoding=encoding) as f:
                    f.read(1024)  # Read first 1KB
                return encoding
            except (UnicodeDecodeError, UnicodeError):
                continue
        return None
-    
+
    def read_csv_headers(self, file_path):
        """Read CSV headers with encoding detection"""
        encoding = self.detect_encoding(file_path)
        if not encoding:
            return None, None, "Could not detect encoding"
-        
+
        try:
-            with open(file_path, 'r', encoding=encoding) as f:
+            with open(file_path, "r", encoding=encoding) as f:
                header_line = f.readline().strip()
                if not header_line:
                    return None, encoding, "Empty header line"
-                
+
                # Clean header from BOM and unicode artifacts
-                header_line = header_line.replace('\ufeff', '').replace('\x00', '')
-                headers = [h.strip().replace('\x00', '') for h in header_line.split(',')]
+                header_line = header_line.replace("\ufeff", "").replace("\x00", "")
+                headers = [
+                    h.strip().replace("\x00", "") for h in header_line.split(",")
+                ]
                headers = [h for h in headers if h and len(h.strip()) > 0]
-                
+
                return headers, encoding, None
        except Exception as e:
            return None, encoding, str(e)
-    
+
    def validate_csv_structure(self, file_path):
        """Validate CSV structure and detect issues"""
        issues = []
-        
+
        # Check headers
        headers, encoding, header_error = self.read_csv_headers(file_path)
        if header_error:
-            issues.append({
-                'type': 'header_error',
-                'message': header_error,
-                'file': file_path
-            })
+            issues.append(
+                {"type": "header_error", "message": header_error, "file": file_path}
+            )
            return issues
-        
+
        if not headers:
-            issues.append({
-                'type': 'no_headers',
-                'message': 'No valid headers found',
-                'file': file_path
-            })
+            issues.append(
+                {
+                    "type": "no_headers",
+                    "message": "No valid headers found",
+                    "file": file_path,
+                }
+            )
            return issues
-        
+
        # Check for encoding issues in headers
-        if any('\x00' in h or 'ÿþ' in h or 'ï»¿' in h for h in headers):
-            issues.append({
-                'type': 'encoding_artifacts',
-                'message': f'Headers contain encoding artifacts: {headers}',
-                'file': file_path,
-                'encoding': encoding
-            })
-        
+        if any("\x00" in h or "ÿþ" in h or "ï»¿" in h for h in headers):
+            issues.append(
+                {
+                    "type": "encoding_artifacts",
+                    "message": f"Headers contain encoding artifacts: {headers}",
+                    "file": file_path,
+                    "encoding": encoding,
+                }
+            )
+
        # Try to read full CSV with pandas
        try:
            df = None
            for enc in self.encodings_to_try:
                try:
-                    df = pd.read_csv(file_path, encoding=enc, on_bad_lines='skip')
+                    df = pd.read_csv(file_path, encoding=enc, on_bad_lines="skip")
                    break
                except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError):
                    continue
-            
+
            if df is None:
-                issues.append({
-                    'type': 'read_error',
-                    'message': 'Could not read CSV with any encoding',
-                    'file': file_path
-                })
+                issues.append(
+                    {
+                        "type": "read_error",
+                        "message": "Could not read CSV with any encoding",
+                        "file": file_path,
+                    }
+                )
                return issues
-            
+
            # Check for inconsistent columns
            expected_cols = len(headers)
            if len(df.columns) != expected_cols:
-                issues.append({
-                    'type': 'column_mismatch',
-                    'message': f'Expected {expected_cols} columns, found {len(df.columns)}',
-                    'file': file_path,
-                    'expected_headers': headers,
-                    'actual_headers': list(df.columns)
-                })
-            
+                issues.append(
+                    {
+                        "type": "column_mismatch",
+                        "message": f"Expected {expected_cols} columns, found {len(df.columns)}",
+                        "file": file_path,
+                        "expected_headers": headers,
+                        "actual_headers": list(df.columns),
+                    }
+                )
+
            # Check for timestamp column
-            timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()]
+            timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()]
            if not timestamp_cols:
-                issues.append({
-                    'type': 'no_timestamp',
-                    'message': 'No timestamp column found',
-                    'file': file_path,
-                    'columns': list(df.columns)
-                })
+                issues.append(
+                    {
+                        "type": "no_timestamp",
+                        "message": "No timestamp column found",
+                        "file": file_path,
+                        "columns": list(df.columns),
+                    }
+                )
            else:
                # Validate timestamp format
                timestamp_col = timestamp_cols[0]
                try:
-                    df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')
+                    df[timestamp_col] = pd.to_datetime(
+                        df[timestamp_col], errors="coerce"
+                    )
                    invalid_timestamps = df[timestamp_col].isna().sum()
                    if invalid_timestamps > 0:
-                        issues.append({
-                            'type': 'invalid_timestamps',
-                            'message': f'{invalid_timestamps} invalid timestamps found',
-                            'file': file_path,
-                            'timestamp_column': timestamp_col
-                        })
+                        issues.append(
+                            {
+                                "type": "invalid_timestamps",
+                                "message": f"{invalid_timestamps} invalid timestamps found",
+                                "file": file_path,
+                                "timestamp_column": timestamp_col,
+                            }
+                        )
                except Exception as e:
-                    issues.append({
-                        'type': 'timestamp_parse_error',
-                        'message': f'Cannot parse timestamps: {str(e)}',
-                        'file': file_path,
-                        'timestamp_column': timestamp_col
-                    })
-        
+                    issues.append(
+                        {
+                            "type": "timestamp_parse_error",
+                            "message": f"Cannot parse timestamps: {str(e)}",
+                            "file": file_path,
+                            "timestamp_column": timestamp_col,
+                        }
+                    )
+
        except Exception as e:
-            issues.append({
-                'type': 'general_error',
-                'message': f'Error reading CSV: {str(e)}',
-                'file': file_path
-            })
-        
+            issues.append(
+                {
+                    "type": "general_error",
+                    "message": f"Error reading CSV: {str(e)}",
+                    "file": file_path,
+                }
+            )
+
        return issues
-    
+
    def scan_directory(self, directory_path):
        """Scan directory for CSV issues"""
        print(f"🔍 Scanning directory: {directory_path}")
-        
+
        csv_files = glob.glob(os.path.join(directory_path, "**/*.csv"), recursive=True)
        total_files = len(csv_files)
-        
+
        print(f"📁 Found {total_files} CSV files")
-        
+
        all_issues = []
        for i, csv_file in enumerate(csv_files, 1):
            print(f"📄 Checking {i}/{total_files}: {os.path.basename(csv_file)}")
-            
+
            issues = self.validate_csv_structure(csv_file)
            if issues:
                all_issues.extend(issues)
                print(f"  ⚠️  {len(issues)} issues found")
            else:
                print(f"  ✅ OK")
-        
+
        return all_issues
-    
+
    def fix_csv_file(self, file_path, backup=True):
        """Fix a problematic CSV file"""
        print(f"🔧 Fixing CSV file: {file_path}")
-        
+
        if backup:
-            backup_path = f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+            backup_path = (
+                f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+            )
            import shutil
+
            shutil.copy2(file_path, backup_path)
            print(f"📋 Backup created: {backup_path}")
-        
+
        # Detect encoding and read file
        encoding = self.detect_encoding(file_path)
        if not encoding:
            print("❌ Could not detect encoding")
            return False
-        
+
        try:
            # Read with detected encoding
-            df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='skip')
-            
+            df = pd.read_csv(file_path, encoding=encoding, on_bad_lines="skip")
+
            # Clean column names
-            df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns]
-            
+            df.columns = [
+                col.replace("\ufeff", "").replace("\x00", "").strip()
+                for col in df.columns
+            ]
+
            # Find timestamp column
-            timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()]
+            timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()]
            if timestamp_cols:
                timestamp_col = timestamp_cols[0]
                # Normalize to 'timestamp'
-                if timestamp_col != 'timestamp':
-                    df = df.rename(columns={timestamp_col: 'timestamp'})
-                
+                if timestamp_col != "timestamp":
+                    df = df.rename(columns={timestamp_col: "timestamp"})
+
                # Fix timestamps
-                df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
+                df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
                # Remove rows with invalid timestamps
-                df = df.dropna(subset=['timestamp'])
-            
+                df = df.dropna(subset=["timestamp"])
+
            # Write fixed file with UTF-8 encoding
-            df.to_csv(file_path, index=False, encoding='utf-8')
+            df.to_csv(file_path, index=False, encoding="utf-8")
            print(f"✅ Fixed and saved with UTF-8 encoding")
-            
+
            return True
-            
+
        except Exception as e:
            print(f"❌ Error fixing file: {e}")
            return False
-    
+
    def print_issue_summary(self, issues):
        """Print a summary of issues found"""
        if not issues:
            print("\n🎉 No issues found!")
            return
-        
+
        print(f"\n📊 Summary: {len(issues)} issues found")
-        
+
        issue_types = {}
        for issue in issues:
-            issue_type = issue['type']
+            issue_type = issue["type"]
            if issue_type not in issue_types:
                issue_types[issue_type] = []
            issue_types[issue_type].append(issue)
-        
+
        for issue_type, type_issues in issue_types.items():
-            print(f"\n🔸 {issue_type.replace('_', ' ').title()}: {len(type_issues)} files")
+            print(
+                f"\n🔸 {issue_type.replace('_', ' ').title()}: {len(type_issues)} files"
+            )
            for issue in type_issues[:5]:  # Show first 5
                print(f"  - {os.path.basename(issue['file'])}: {issue['message']}")
            if len(type_issues) > 5:
@ -254,41 +280,53 @@ class CSVValidator:


 def main():
-    parser = argparse.ArgumentParser(description='CSV Validator and Cleaner for PLC Streaming System')
-    parser.add_argument('path', help='Path to CSV file or directory to process')
-    parser.add_argument('--scan', action='store_true', help='Scan directory for issues (default)')
-    parser.add_argument('--fix', action='store_true', help='Fix individual CSV file')
-    parser.add_argument('--fix-all', action='store_true', help='Fix all problematic files in directory')
-    parser.add_argument('--no-backup', action='store_true', help='Do not create backup when fixing')
-    
+    parser = argparse.ArgumentParser(
+        description="CSV Validator and Cleaner for PLC Streaming System"
+    )
+    parser.add_argument("path", help="Path to CSV file or directory to process")
+    parser.add_argument(
+        "--scan", action="store_true", help="Scan directory for issues (default)"
+    )
+    parser.add_argument("--fix", action="store_true", help="Fix individual CSV file")
+    parser.add_argument(
+        "--fix-all", action="store_true", help="Fix all problematic files in directory"
+    )
+    parser.add_argument(
+        "--no-backup", action="store_true", help="Do not create backup when fixing"
+    )
+
    args = parser.parse_args()
-    
+
    validator = CSVValidator()
-    
+
    if args.fix and os.path.isfile(args.path):
        # Fix single file
        success = validator.fix_csv_file(args.path, backup=not args.no_backup)
        sys.exit(0 if success else 1)
-    
+
    elif os.path.isdir(args.path):
        # Scan directory
        issues = validator.scan_directory(args.path)
        validator.print_issue_summary(issues)
-        
+
        if args.fix_all and issues:
-            print(f"\n🔧 Fixing {len(set(issue['file'] for issue in issues))} problematic files...")
-            
-            problematic_files = set(issue['file'] for issue in issues if issue['type'] != 'no_timestamp')
+            print(
+                f"\n🔧 Fixing {len(set(issue['file'] for issue in issues))} problematic files..."
+            )
+
+            problematic_files = set(
+                issue["file"] for issue in issues if issue["type"] != "no_timestamp"
+            )
            fixed_count = 0
-            
+
            for file_path in problematic_files:
                if validator.fix_csv_file(file_path, backup=not args.no_backup):
                    fixed_count += 1
-            
+
            print(f"✅ Fixed {fixed_count}/{len(problematic_files)} files")
-        
+
        sys.exit(1 if issues else 0)
-    
+
    else:
        print(f"❌ Path not found: {args.path}")
        sys.exit(1)