feat: Enhance event logging, improve CSV handling, and optimize Chart.js zoom functionality
This commit is contained in:
parent
e517f40a5d
commit
b864e81aa3
|
@ -3928,8 +3928,311 @@
|
|||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:28:49.611975",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:29:11.885234",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:29:47.127915",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:33:00.587011",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:34:00.990295",
|
||||
"level": "info",
|
||||
"event_type": "application_started",
|
||||
"message": "Application initialization completed successfully",
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:34:01.069801",
|
||||
"level": "info",
|
||||
"event_type": "dataset_activated",
|
||||
"message": "Dataset activated: DAR",
|
||||
"details": {
|
||||
"dataset_id": "DAR",
|
||||
"variables_count": 2,
|
||||
"streaming_count": 2,
|
||||
"prefix": "gateway_phoenix"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:34:01.075470",
|
||||
"level": "info",
|
||||
"event_type": "dataset_activated",
|
||||
"message": "Dataset activated: Fast",
|
||||
"details": {
|
||||
"dataset_id": "Fast",
|
||||
"variables_count": 2,
|
||||
"streaming_count": 1,
|
||||
"prefix": "fast"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:34:01.083475",
|
||||
"level": "info",
|
||||
"event_type": "csv_recording_started",
|
||||
"message": "CSV recording started: 2 datasets activated",
|
||||
"details": {
|
||||
"activated_datasets": 2,
|
||||
"total_datasets": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:06.741557",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:28.675944",
|
||||
"level": "info",
|
||||
"event_type": "application_started",
|
||||
"message": "Application initialization completed successfully",
|
||||
"details": {}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:28.758707",
|
||||
"level": "info",
|
||||
"event_type": "dataset_activated",
|
||||
"message": "Dataset activated: DAR",
|
||||
"details": {
|
||||
"dataset_id": "DAR",
|
||||
"variables_count": 2,
|
||||
"streaming_count": 2,
|
||||
"prefix": "gateway_phoenix"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:28.764039",
|
||||
"level": "info",
|
||||
"event_type": "dataset_activated",
|
||||
"message": "Dataset activated: Fast",
|
||||
"details": {
|
||||
"dataset_id": "Fast",
|
||||
"variables_count": 2,
|
||||
"streaming_count": 1,
|
||||
"prefix": "fast"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:28.768130",
|
||||
"level": "info",
|
||||
"event_type": "csv_recording_started",
|
||||
"message": "CSV recording started: 2 datasets activated",
|
||||
"details": {
|
||||
"activated_datasets": 2,
|
||||
"total_datasets": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:35:39.619619",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:40:08.431291",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:40:14.468124",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:40:41.861858",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:40:45.084903",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:40:49.549459",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 500,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:41:19.890329",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 100,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": "2025-08-15T00:41:32.191310",
|
||||
"level": "info",
|
||||
"event_type": "plot_session_created",
|
||||
"message": "Plot session 'UR29' created and started",
|
||||
"details": {
|
||||
"session_id": "plot_1",
|
||||
"variables": [
|
||||
"UR29_Brix",
|
||||
"UR29_ma",
|
||||
"AUX Blink_1.0S"
|
||||
],
|
||||
"time_window": 3600,
|
||||
"trigger_variable": null,
|
||||
"auto_started": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"last_updated": "2025-08-15T00:27:40.211134",
|
||||
"total_entries": 360
|
||||
"last_updated": "2025-08-15T00:41:32.191310",
|
||||
"total_entries": 381
|
||||
}
|
|
@ -2,12 +2,12 @@
|
|||
"plots": [
|
||||
{
|
||||
"id": "plot_1",
|
||||
"line_tension": 0.1,
|
||||
"line_tension": 0,
|
||||
"name": "UR29",
|
||||
"point_hover_radius": 4,
|
||||
"point_radius": 0,
|
||||
"stepped": false,
|
||||
"time_window": 500,
|
||||
"stepped": true,
|
||||
"time_window": 3600,
|
||||
"trigger_enabled": false,
|
||||
"trigger_on_true": true,
|
||||
"trigger_variable": null,
|
||||
|
|
|
@ -67,6 +67,9 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
|
|||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const [isLoadingHistorical, setIsLoadingHistorical] = useState(false);
|
||||
const resolvedConfigRef = useRef(null);
|
||||
|
||||
// Data preservation system for zoom operations
|
||||
const dataBackupRef = useRef(new Map());
|
||||
|
||||
const bgColor = useColorModeValue('white', 'gray.800');
|
||||
const textColor = useColorModeValue('gray.600', 'gray.300');
|
||||
|
@ -580,17 +583,55 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
|
|||
},
|
||||
...(zoomAvailable ? {
|
||||
zoom: {
|
||||
// Solo habilitar zoom/pan en modo fullscreen
|
||||
// Habilitar zoom/pan siempre, pero con configuración optimizada para streaming
|
||||
pan: {
|
||||
enabled: !!session?.isFullscreen,
|
||||
enabled: true,
|
||||
mode: 'x',
|
||||
modifierKey: 'shift'
|
||||
modifierKey: 'shift',
|
||||
onPanComplete: function(context) {
|
||||
// Preserve streaming state after pan
|
||||
const chart = context.chart;
|
||||
if (chart.options?.scales?.x?.realtime) {
|
||||
const realtime = chart.options.scales.x.realtime;
|
||||
// Ensure streaming continues after pan
|
||||
if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) {
|
||||
realtime.pause = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
zoom: {
|
||||
drag: { enabled: !!session?.isFullscreen },
|
||||
wheel: { enabled: !!session?.isFullscreen },
|
||||
pinch: { enabled: !!session?.isFullscreen },
|
||||
mode: 'x'
|
||||
drag: {
|
||||
enabled: true,
|
||||
backgroundColor: 'rgba(128,128,128,0.3)'
|
||||
},
|
||||
wheel: {
|
||||
enabled: true,
|
||||
speed: 0.1
|
||||
},
|
||||
pinch: { enabled: true },
|
||||
mode: 'x',
|
||||
onZoomComplete: function(context) {
|
||||
// Preserve streaming state and data after zoom
|
||||
const chart = context.chart;
|
||||
console.log('🔍 Zoom completed, preserving streaming state...');
|
||||
|
||||
if (chart.options?.scales?.x?.realtime) {
|
||||
const realtime = chart.options.scales.x.realtime;
|
||||
// Don't auto-pause streaming after zoom
|
||||
if (!sessionDataRef.current.userPaused && !sessionDataRef.current.isPaused) {
|
||||
setTimeout(() => {
|
||||
realtime.pause = false;
|
||||
}, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
limits: {
|
||||
x: {
|
||||
min: 'original',
|
||||
max: 'original'
|
||||
}
|
||||
}
|
||||
}
|
||||
} : {})
|
||||
|
@ -830,6 +871,18 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
|
|||
// Update chart
|
||||
if (pointsAdded > 0) {
|
||||
chart.update('quiet');
|
||||
|
||||
// Backup data periodically when significant data is added
|
||||
if (pointsAdded > 5 && dataBackupRef.current) {
|
||||
const backup = chart.data.datasets.map(dataset => ({
|
||||
label: dataset.label,
|
||||
data: [...(dataset.data || [])],
|
||||
timestamp: Date.now()
|
||||
}));
|
||||
|
||||
dataBackupRef.current.set('current', backup);
|
||||
console.log(`📦 Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear NaN insertion flag
|
||||
|
@ -982,17 +1035,78 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
|
|||
if (!chartRef.current) return;
|
||||
|
||||
try {
|
||||
console.log('🔄 Resetting zoom...');
|
||||
|
||||
// Backup data before zoom reset
|
||||
const backup = chartRef.current.data.datasets.map(dataset => ({
|
||||
label: dataset.label,
|
||||
data: [...(dataset.data || [])],
|
||||
timestamp: Date.now()
|
||||
}));
|
||||
|
||||
dataBackupRef.current.set('current', backup);
|
||||
console.log(`📦 Data backed up: ${backup.reduce((total, ds) => total + ds.data.length, 0)} points`);
|
||||
|
||||
// Store current streaming state
|
||||
const realtimeOptions = chartRef.current.options?.scales?.x?.realtime;
|
||||
const wasPaused = realtimeOptions?.pause || false;
|
||||
|
||||
// Try to reset zoom using the zoom plugin
|
||||
if (chartRef.current.resetZoom) {
|
||||
chartRef.current.resetZoom();
|
||||
chartRef.current.resetZoom('none'); // Use 'none' animation mode for faster reset
|
||||
} else if (window.Chart?.helpers?.getRelativePosition) {
|
||||
// Fallback: manually reset zoom by updating scale options
|
||||
const chart = chartRef.current;
|
||||
if (chart.options?.scales?.x?.realtime) {
|
||||
// For realtime charts, just trigger an update
|
||||
// For realtime charts, reset the scale manually
|
||||
const now = Date.now();
|
||||
chart.options.scales.x.realtime.duration = chart.options.scales.x.realtime.duration || 60000;
|
||||
chart.options.scales.x.min = now - chart.options.scales.x.realtime.duration;
|
||||
chart.options.scales.x.max = now;
|
||||
chart.update('none');
|
||||
}
|
||||
}
|
||||
|
||||
// Restore data if it was lost during zoom reset
|
||||
setTimeout(() => {
|
||||
if (chartRef.current && dataBackupRef.current.has('current')) {
|
||||
const backupData = dataBackupRef.current.get('current');
|
||||
const now = Date.now();
|
||||
|
||||
// Only restore if backup is recent (within 30 seconds)
|
||||
if (now - backupData[0]?.timestamp <= 30000) {
|
||||
let restored = false;
|
||||
chartRef.current.data.datasets.forEach((dataset, index) => {
|
||||
const backupDataset = backupData[index];
|
||||
if (backupDataset && dataset.label === backupDataset.label) {
|
||||
// Only restore if current data is significantly smaller
|
||||
if (!dataset.data || dataset.data.length < backupDataset.data.length * 0.3) {
|
||||
dataset.data = [...backupDataset.data];
|
||||
restored = true;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (restored) {
|
||||
console.log('🔄 Chart data restored from backup');
|
||||
chartRef.current.update('none');
|
||||
|
||||
const totalPoints = chartRef.current.data.datasets.reduce((total, dataset) =>
|
||||
total + (dataset.data?.length || 0), 0
|
||||
);
|
||||
setDataPointsCount(totalPoints);
|
||||
}
|
||||
}
|
||||
|
||||
// Restore streaming state
|
||||
if (realtimeOptions && !wasPaused) {
|
||||
realtimeOptions.pause = false;
|
||||
}
|
||||
|
||||
console.log('✅ Zoom reset with data preservation complete');
|
||||
}
|
||||
}, 100); // Small delay to ensure zoom reset is complete
|
||||
|
||||
} catch (error) {
|
||||
console.warn('Failed to reset zoom:', error);
|
||||
}
|
||||
|
@ -1339,6 +1453,35 @@ const ChartjsPlot = ({ session, height = '400px' }) => {
|
|||
</Box>
|
||||
)}
|
||||
|
||||
{/* Reset Zoom Button */}
|
||||
{chartRef.current && (
|
||||
<Box
|
||||
position="absolute"
|
||||
bottom={2}
|
||||
right={2}
|
||||
zIndex={10}
|
||||
>
|
||||
<button
|
||||
onClick={resetZoom}
|
||||
style={{
|
||||
background: 'rgba(0, 0, 0, 0.7)',
|
||||
color: 'white',
|
||||
border: 'none',
|
||||
borderRadius: '4px',
|
||||
padding: '4px 8px',
|
||||
fontSize: '11px',
|
||||
cursor: 'pointer',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
gap: '4px'
|
||||
}}
|
||||
title="Reset zoom and restore data"
|
||||
>
|
||||
🔄 Reset Zoom
|
||||
</button>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<canvas
|
||||
ref={canvasRef}
|
||||
style={{
|
||||
|
|
97
main.py
97
main.py
|
@ -1925,27 +1925,37 @@ def get_historical_data():
|
|||
|
||||
# Read first line to check if any required variables are present with proper encoding handling
|
||||
header_line = None
|
||||
for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']:
|
||||
for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]:
|
||||
try:
|
||||
with open(csv_file, "r", encoding=encoding) as f:
|
||||
header_line = f.readline().strip()
|
||||
if header_line:
|
||||
print(f"🔍 DEBUG: Successfully read header with {encoding} encoding")
|
||||
print(
|
||||
f"🔍 DEBUG: Successfully read header with {encoding} encoding"
|
||||
)
|
||||
break
|
||||
except (UnicodeDecodeError, UnicodeError):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
if not header_line:
|
||||
print(f"🔍 DEBUG: Could not read header from {csv_file}, skipping")
|
||||
print(
|
||||
f"🔍 DEBUG: Could not read header from {csv_file}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Clean header line from BOM and normalize
|
||||
header_line = header_line.replace('\ufeff', '').replace('\x00', '')
|
||||
headers = [h.strip().replace('\x00', '') for h in header_line.split(",")]
|
||||
header_line = header_line.replace("\ufeff", "").replace("\x00", "")
|
||||
headers = [
|
||||
h.strip().replace("\x00", "") for h in header_line.split(",")
|
||||
]
|
||||
# Clean any remaining unicode artifacts
|
||||
headers = [h for h in headers if h and len(h.replace('\x00', '').strip()) > 0]
|
||||
headers = [
|
||||
h
|
||||
for h in headers
|
||||
if h and len(h.replace("\x00", "").strip()) > 0
|
||||
]
|
||||
print(f"🔍 DEBUG: Headers in {csv_file}: {headers}")
|
||||
|
||||
# Check if any of our variables are in this file
|
||||
|
@ -1961,65 +1971,90 @@ def get_historical_data():
|
|||
# Read the CSV file with proper encoding and error handling
|
||||
print(f"🔍 DEBUG: Reading CSV file with pandas...")
|
||||
df = None
|
||||
for encoding in ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1']:
|
||||
for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]:
|
||||
try:
|
||||
df = pd.read_csv(csv_file, encoding=encoding, on_bad_lines='skip')
|
||||
print(f"🔍 DEBUG: CSV loaded with {encoding}, shape: {df.shape}")
|
||||
df = pd.read_csv(
|
||||
csv_file, encoding=encoding, on_bad_lines="skip"
|
||||
)
|
||||
print(
|
||||
f"🔍 DEBUG: CSV loaded with {encoding}, shape: {df.shape}"
|
||||
)
|
||||
break
|
||||
except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError):
|
||||
except (
|
||||
UnicodeDecodeError,
|
||||
UnicodeError,
|
||||
pd.errors.ParserError,
|
||||
):
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"🔍 DEBUG: Error reading with {encoding}: {e}")
|
||||
continue
|
||||
|
||||
|
||||
if df is None:
|
||||
print(f"Warning: Could not read CSV file {csv_file} with any encoding")
|
||||
print(
|
||||
f"Warning: Could not read CSV file {csv_file} with any encoding"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
# Clean column names from BOM and unicode artifacts
|
||||
df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns]
|
||||
df.columns = [
|
||||
col.replace("\ufeff", "").replace("\x00", "").strip()
|
||||
for col in df.columns
|
||||
]
|
||||
|
||||
# Convert timestamp to datetime with flexible parsing
|
||||
print(f"🔍 DEBUG: Converting timestamps...")
|
||||
timestamp_col = None
|
||||
for col in df.columns:
|
||||
if 'timestamp' in col.lower():
|
||||
if "timestamp" in col.lower():
|
||||
timestamp_col = col
|
||||
break
|
||||
|
||||
|
||||
if timestamp_col is None:
|
||||
print(f"🔍 DEBUG: No timestamp column found in {csv_file}, skipping")
|
||||
print(
|
||||
f"🔍 DEBUG: No timestamp column found in {csv_file}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
try:
|
||||
# Try multiple timestamp formats
|
||||
df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')
|
||||
df[timestamp_col] = pd.to_datetime(
|
||||
df[timestamp_col], errors="coerce"
|
||||
)
|
||||
# Remove rows with invalid timestamps
|
||||
df = df.dropna(subset=[timestamp_col])
|
||||
|
||||
|
||||
if df.empty:
|
||||
print(f"🔍 DEBUG: No valid timestamps in {csv_file}, skipping")
|
||||
print(
|
||||
f"🔍 DEBUG: No valid timestamps in {csv_file}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
|
||||
# Normalize column name to 'timestamp'
|
||||
if timestamp_col != 'timestamp':
|
||||
df = df.rename(columns={timestamp_col: 'timestamp'})
|
||||
|
||||
if timestamp_col != "timestamp":
|
||||
df = df.rename(columns={timestamp_col: "timestamp"})
|
||||
|
||||
print(
|
||||
f"🔍 DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}"
|
||||
)
|
||||
print(f"🔍 DEBUG: Filter range: {start_time} to {end_time}")
|
||||
except Exception as e:
|
||||
print(f"🔍 DEBUG: Timestamp conversion failed for {csv_file}: {e}")
|
||||
print(
|
||||
f"🔍 DEBUG: Timestamp conversion failed for {csv_file}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Recalculate matching variables after column cleaning
|
||||
clean_headers = list(df.columns)
|
||||
matching_vars = [var for var in variables if var in clean_headers]
|
||||
print(f"🔍 DEBUG: Matching variables after cleaning: {matching_vars}")
|
||||
print(
|
||||
f"🔍 DEBUG: Matching variables after cleaning: {matching_vars}"
|
||||
)
|
||||
|
||||
if not matching_vars:
|
||||
print(f"🔍 DEBUG: No matching variables after cleaning in {csv_file}, skipping")
|
||||
print(
|
||||
f"🔍 DEBUG: No matching variables after cleaning in {csv_file}, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Filter by time range
|
||||
|
@ -2069,7 +2104,9 @@ def get_historical_data():
|
|||
)
|
||||
except Exception as e:
|
||||
# Skip invalid values
|
||||
print(f"🔍 DEBUG: Skipping invalid value for {var}: {e}")
|
||||
print(
|
||||
f"🔍 DEBUG: Skipping invalid value for {var}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
"should_connect": true,
|
||||
"should_stream": false,
|
||||
"active_datasets": [
|
||||
"Test",
|
||||
"DAR",
|
||||
"Fast",
|
||||
"DAR"
|
||||
"Test"
|
||||
]
|
||||
},
|
||||
"auto_recovery_enabled": true,
|
||||
"last_update": "2025-08-15T00:26:15.383017"
|
||||
"last_update": "2025-08-15T00:35:28.773668"
|
||||
}
|
|
@ -25,228 +25,254 @@ from datetime import datetime
|
|||
|
||||
class CSVValidator:
|
||||
"""Validates and cleans CSV files for the PLC streaming system"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.encodings_to_try = ['utf-8', 'utf-8-sig', 'utf-16', 'latin-1', 'cp1252']
|
||||
self.encodings_to_try = ["utf-8", "utf-8-sig", "utf-16", "latin-1", "cp1252"]
|
||||
self.issues_found = []
|
||||
self.fixed_files = []
|
||||
|
||||
|
||||
def detect_encoding(self, file_path):
|
||||
"""Detect the encoding of a CSV file"""
|
||||
for encoding in self.encodings_to_try:
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as f:
|
||||
with open(file_path, "r", encoding=encoding) as f:
|
||||
f.read(1024) # Read first 1KB
|
||||
return encoding
|
||||
except (UnicodeDecodeError, UnicodeError):
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def read_csv_headers(self, file_path):
|
||||
"""Read CSV headers with encoding detection"""
|
||||
encoding = self.detect_encoding(file_path)
|
||||
if not encoding:
|
||||
return None, None, "Could not detect encoding"
|
||||
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as f:
|
||||
with open(file_path, "r", encoding=encoding) as f:
|
||||
header_line = f.readline().strip()
|
||||
if not header_line:
|
||||
return None, encoding, "Empty header line"
|
||||
|
||||
|
||||
# Clean header from BOM and unicode artifacts
|
||||
header_line = header_line.replace('\ufeff', '').replace('\x00', '')
|
||||
headers = [h.strip().replace('\x00', '') for h in header_line.split(',')]
|
||||
header_line = header_line.replace("\ufeff", "").replace("\x00", "")
|
||||
headers = [
|
||||
h.strip().replace("\x00", "") for h in header_line.split(",")
|
||||
]
|
||||
headers = [h for h in headers if h and len(h.strip()) > 0]
|
||||
|
||||
|
||||
return headers, encoding, None
|
||||
except Exception as e:
|
||||
return None, encoding, str(e)
|
||||
|
||||
|
||||
def validate_csv_structure(self, file_path):
|
||||
"""Validate CSV structure and detect issues"""
|
||||
issues = []
|
||||
|
||||
|
||||
# Check headers
|
||||
headers, encoding, header_error = self.read_csv_headers(file_path)
|
||||
if header_error:
|
||||
issues.append({
|
||||
'type': 'header_error',
|
||||
'message': header_error,
|
||||
'file': file_path
|
||||
})
|
||||
issues.append(
|
||||
{"type": "header_error", "message": header_error, "file": file_path}
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
if not headers:
|
||||
issues.append({
|
||||
'type': 'no_headers',
|
||||
'message': 'No valid headers found',
|
||||
'file': file_path
|
||||
})
|
||||
issues.append(
|
||||
{
|
||||
"type": "no_headers",
|
||||
"message": "No valid headers found",
|
||||
"file": file_path,
|
||||
}
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
# Check for encoding issues in headers
|
||||
if any('\x00' in h or 'ÿþ' in h or '' in h for h in headers):
|
||||
issues.append({
|
||||
'type': 'encoding_artifacts',
|
||||
'message': f'Headers contain encoding artifacts: {headers}',
|
||||
'file': file_path,
|
||||
'encoding': encoding
|
||||
})
|
||||
|
||||
if any("\x00" in h or "ÿþ" in h or "" in h for h in headers):
|
||||
issues.append(
|
||||
{
|
||||
"type": "encoding_artifacts",
|
||||
"message": f"Headers contain encoding artifacts: {headers}",
|
||||
"file": file_path,
|
||||
"encoding": encoding,
|
||||
}
|
||||
)
|
||||
|
||||
# Try to read full CSV with pandas
|
||||
try:
|
||||
df = None
|
||||
for enc in self.encodings_to_try:
|
||||
try:
|
||||
df = pd.read_csv(file_path, encoding=enc, on_bad_lines='skip')
|
||||
df = pd.read_csv(file_path, encoding=enc, on_bad_lines="skip")
|
||||
break
|
||||
except (UnicodeDecodeError, UnicodeError, pd.errors.ParserError):
|
||||
continue
|
||||
|
||||
|
||||
if df is None:
|
||||
issues.append({
|
||||
'type': 'read_error',
|
||||
'message': 'Could not read CSV with any encoding',
|
||||
'file': file_path
|
||||
})
|
||||
issues.append(
|
||||
{
|
||||
"type": "read_error",
|
||||
"message": "Could not read CSV with any encoding",
|
||||
"file": file_path,
|
||||
}
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
# Check for inconsistent columns
|
||||
expected_cols = len(headers)
|
||||
if len(df.columns) != expected_cols:
|
||||
issues.append({
|
||||
'type': 'column_mismatch',
|
||||
'message': f'Expected {expected_cols} columns, found {len(df.columns)}',
|
||||
'file': file_path,
|
||||
'expected_headers': headers,
|
||||
'actual_headers': list(df.columns)
|
||||
})
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"type": "column_mismatch",
|
||||
"message": f"Expected {expected_cols} columns, found {len(df.columns)}",
|
||||
"file": file_path,
|
||||
"expected_headers": headers,
|
||||
"actual_headers": list(df.columns),
|
||||
}
|
||||
)
|
||||
|
||||
# Check for timestamp column
|
||||
timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()]
|
||||
timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()]
|
||||
if not timestamp_cols:
|
||||
issues.append({
|
||||
'type': 'no_timestamp',
|
||||
'message': 'No timestamp column found',
|
||||
'file': file_path,
|
||||
'columns': list(df.columns)
|
||||
})
|
||||
issues.append(
|
||||
{
|
||||
"type": "no_timestamp",
|
||||
"message": "No timestamp column found",
|
||||
"file": file_path,
|
||||
"columns": list(df.columns),
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Validate timestamp format
|
||||
timestamp_col = timestamp_cols[0]
|
||||
try:
|
||||
df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors='coerce')
|
||||
df[timestamp_col] = pd.to_datetime(
|
||||
df[timestamp_col], errors="coerce"
|
||||
)
|
||||
invalid_timestamps = df[timestamp_col].isna().sum()
|
||||
if invalid_timestamps > 0:
|
||||
issues.append({
|
||||
'type': 'invalid_timestamps',
|
||||
'message': f'{invalid_timestamps} invalid timestamps found',
|
||||
'file': file_path,
|
||||
'timestamp_column': timestamp_col
|
||||
})
|
||||
issues.append(
|
||||
{
|
||||
"type": "invalid_timestamps",
|
||||
"message": f"{invalid_timestamps} invalid timestamps found",
|
||||
"file": file_path,
|
||||
"timestamp_column": timestamp_col,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
issues.append({
|
||||
'type': 'timestamp_parse_error',
|
||||
'message': f'Cannot parse timestamps: {str(e)}',
|
||||
'file': file_path,
|
||||
'timestamp_column': timestamp_col
|
||||
})
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"type": "timestamp_parse_error",
|
||||
"message": f"Cannot parse timestamps: {str(e)}",
|
||||
"file": file_path,
|
||||
"timestamp_column": timestamp_col,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
issues.append({
|
||||
'type': 'general_error',
|
||||
'message': f'Error reading CSV: {str(e)}',
|
||||
'file': file_path
|
||||
})
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"type": "general_error",
|
||||
"message": f"Error reading CSV: {str(e)}",
|
||||
"file": file_path,
|
||||
}
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def scan_directory(self, directory_path):
|
||||
"""Scan directory for CSV issues"""
|
||||
print(f"🔍 Scanning directory: {directory_path}")
|
||||
|
||||
|
||||
csv_files = glob.glob(os.path.join(directory_path, "**/*.csv"), recursive=True)
|
||||
total_files = len(csv_files)
|
||||
|
||||
|
||||
print(f"📁 Found {total_files} CSV files")
|
||||
|
||||
|
||||
all_issues = []
|
||||
for i, csv_file in enumerate(csv_files, 1):
|
||||
print(f"📄 Checking {i}/{total_files}: {os.path.basename(csv_file)}")
|
||||
|
||||
|
||||
issues = self.validate_csv_structure(csv_file)
|
||||
if issues:
|
||||
all_issues.extend(issues)
|
||||
print(f" ⚠️ {len(issues)} issues found")
|
||||
else:
|
||||
print(f" ✅ OK")
|
||||
|
||||
|
||||
return all_issues
|
||||
|
||||
|
||||
def fix_csv_file(self, file_path, backup=True):
|
||||
"""Fix a problematic CSV file"""
|
||||
print(f"🔧 Fixing CSV file: {file_path}")
|
||||
|
||||
|
||||
if backup:
|
||||
backup_path = f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
backup_path = (
|
||||
f"{file_path}.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
)
|
||||
import shutil
|
||||
|
||||
shutil.copy2(file_path, backup_path)
|
||||
print(f"📋 Backup created: {backup_path}")
|
||||
|
||||
|
||||
# Detect encoding and read file
|
||||
encoding = self.detect_encoding(file_path)
|
||||
if not encoding:
|
||||
print("❌ Could not detect encoding")
|
||||
return False
|
||||
|
||||
|
||||
try:
|
||||
# Read with detected encoding
|
||||
df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='skip')
|
||||
|
||||
df = pd.read_csv(file_path, encoding=encoding, on_bad_lines="skip")
|
||||
|
||||
# Clean column names
|
||||
df.columns = [col.replace('\ufeff', '').replace('\x00', '').strip() for col in df.columns]
|
||||
|
||||
df.columns = [
|
||||
col.replace("\ufeff", "").replace("\x00", "").strip()
|
||||
for col in df.columns
|
||||
]
|
||||
|
||||
# Find timestamp column
|
||||
timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()]
|
||||
timestamp_cols = [col for col in df.columns if "timestamp" in col.lower()]
|
||||
if timestamp_cols:
|
||||
timestamp_col = timestamp_cols[0]
|
||||
# Normalize to 'timestamp'
|
||||
if timestamp_col != 'timestamp':
|
||||
df = df.rename(columns={timestamp_col: 'timestamp'})
|
||||
|
||||
if timestamp_col != "timestamp":
|
||||
df = df.rename(columns={timestamp_col: "timestamp"})
|
||||
|
||||
# Fix timestamps
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
|
||||
# Remove rows with invalid timestamps
|
||||
df = df.dropna(subset=['timestamp'])
|
||||
|
||||
df = df.dropna(subset=["timestamp"])
|
||||
|
||||
# Write fixed file with UTF-8 encoding
|
||||
df.to_csv(file_path, index=False, encoding='utf-8')
|
||||
df.to_csv(file_path, index=False, encoding="utf-8")
|
||||
print(f"✅ Fixed and saved with UTF-8 encoding")
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error fixing file: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def print_issue_summary(self, issues):
|
||||
"""Print a summary of issues found"""
|
||||
if not issues:
|
||||
print("\n🎉 No issues found!")
|
||||
return
|
||||
|
||||
|
||||
print(f"\n📊 Summary: {len(issues)} issues found")
|
||||
|
||||
|
||||
issue_types = {}
|
||||
for issue in issues:
|
||||
issue_type = issue['type']
|
||||
issue_type = issue["type"]
|
||||
if issue_type not in issue_types:
|
||||
issue_types[issue_type] = []
|
||||
issue_types[issue_type].append(issue)
|
||||
|
||||
|
||||
for issue_type, type_issues in issue_types.items():
|
||||
print(f"\n🔸 {issue_type.replace('_', ' ').title()}: {len(type_issues)} files")
|
||||
print(
|
||||
f"\n🔸 {issue_type.replace('_', ' ').title()}: {len(type_issues)} files"
|
||||
)
|
||||
for issue in type_issues[:5]: # Show first 5
|
||||
print(f" - {os.path.basename(issue['file'])}: {issue['message']}")
|
||||
if len(type_issues) > 5:
|
||||
|
@ -254,41 +280,53 @@ class CSVValidator:
|
|||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='CSV Validator and Cleaner for PLC Streaming System')
|
||||
parser.add_argument('path', help='Path to CSV file or directory to process')
|
||||
parser.add_argument('--scan', action='store_true', help='Scan directory for issues (default)')
|
||||
parser.add_argument('--fix', action='store_true', help='Fix individual CSV file')
|
||||
parser.add_argument('--fix-all', action='store_true', help='Fix all problematic files in directory')
|
||||
parser.add_argument('--no-backup', action='store_true', help='Do not create backup when fixing')
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="CSV Validator and Cleaner for PLC Streaming System"
|
||||
)
|
||||
parser.add_argument("path", help="Path to CSV file or directory to process")
|
||||
parser.add_argument(
|
||||
"--scan", action="store_true", help="Scan directory for issues (default)"
|
||||
)
|
||||
parser.add_argument("--fix", action="store_true", help="Fix individual CSV file")
|
||||
parser.add_argument(
|
||||
"--fix-all", action="store_true", help="Fix all problematic files in directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-backup", action="store_true", help="Do not create backup when fixing"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
validator = CSVValidator()
|
||||
|
||||
|
||||
if args.fix and os.path.isfile(args.path):
|
||||
# Fix single file
|
||||
success = validator.fix_csv_file(args.path, backup=not args.no_backup)
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
elif os.path.isdir(args.path):
|
||||
# Scan directory
|
||||
issues = validator.scan_directory(args.path)
|
||||
validator.print_issue_summary(issues)
|
||||
|
||||
|
||||
if args.fix_all and issues:
|
||||
print(f"\n🔧 Fixing {len(set(issue['file'] for issue in issues))} problematic files...")
|
||||
|
||||
problematic_files = set(issue['file'] for issue in issues if issue['type'] != 'no_timestamp')
|
||||
print(
|
||||
f"\n🔧 Fixing {len(set(issue['file'] for issue in issues))} problematic files..."
|
||||
)
|
||||
|
||||
problematic_files = set(
|
||||
issue["file"] for issue in issues if issue["type"] != "no_timestamp"
|
||||
)
|
||||
fixed_count = 0
|
||||
|
||||
|
||||
for file_path in problematic_files:
|
||||
if validator.fix_csv_file(file_path, backup=not args.no_backup):
|
||||
fixed_count += 1
|
||||
|
||||
|
||||
print(f"✅ Fixed {fixed_count}/{len(problematic_files)} files")
|
||||
|
||||
|
||||
sys.exit(1 if issues else 0)
|
||||
|
||||
|
||||
else:
|
||||
print(f"❌ Path not found: {args.path}")
|
||||
sys.exit(1)
|
||||
|
|
Loading…
Reference in New Issue