From ae1fc0508d71fcd29b202c129a362404af9cc50a Mon Sep 17 00:00:00 2001 From: Miguel Date: Sat, 16 Aug 2025 10:06:43 +0200 Subject: [PATCH] feat: Implement historical data caching mechanism with buffer zones for improved performance and efficiency in data retrieval --- application_events.json | 11 +- core/historical_cache.py | 231 ++++++++++ .../src/components/PlotHistoricalSession.jsx | 58 ++- main.py | 430 +++++++++--------- 4 files changed, 495 insertions(+), 235 deletions(-) create mode 100644 core/historical_cache.py diff --git a/application_events.json b/application_events.json index 02e97c8..0e2a4d2 100644 --- a/application_events.json +++ b/application_events.json @@ -8212,8 +8212,15 @@ "event_type": "application_started", "message": "Application initialization completed successfully", "details": {} + }, + { + "timestamp": "2025-08-16T10:01:50.009210", + "level": "info", + "event_type": "application_started", + "message": "Application initialization completed successfully", + "details": {} } ], - "last_updated": "2025-08-16T09:30:47.754146", - "total_entries": 684 + "last_updated": "2025-08-16T10:01:50.009210", + "total_entries": 685 } \ No newline at end of file diff --git a/core/historical_cache.py b/core/historical_cache.py new file mode 100644 index 0000000..1ea34ca --- /dev/null +++ b/core/historical_cache.py @@ -0,0 +1,231 @@ +""" +Historical Data Cache Manager +Provides efficient caching of CSV data for historical plot requests with buffer zones +""" + +import os +import logging +import pandas as pd +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple +import threading +import time +from collections import OrderedDict + + +class HistoricalDataCache: + """ + Cache manager for historical CSV data with the following features: + - 25% buffer around requested time ranges + - Memory-efficient storage using pandas DataFrames + - Automatic cache invalidation based on file modification times + - Thread-safe operations + - LRU eviction when memory limit is reached + """ + + def __init__( + self, + logger: logging.Logger, + max_cache_entries: int = 50, + buffer_percentage: float = 0.25, + ): + self.logger = logger + self.max_cache_entries = max_cache_entries + self.buffer_percentage = buffer_percentage + + # Cache structure: {cache_key: CacheEntry} + self.cache = OrderedDict() + self.cache_lock = threading.RLock() + + # File modification time tracking + self.file_mtimes = {} + + self.logger.info( + f"Historical data cache initialized with {max_cache_entries} max entries and {buffer_percentage*100}% buffer" + ) + + def _generate_cache_key( + self, variables: List[str], start_time: datetime, end_time: datetime + ) -> str: + """Generate a unique cache key for the given parameters""" + variables_key = "_".join(sorted(variables)) + time_key = f"{start_time.isoformat()}_{end_time.isoformat()}" + return f"{variables_key}|{time_key}" + + def _calculate_buffer_range( + self, start_time: datetime, end_time: datetime + ) -> Tuple[datetime, datetime]: + """Calculate buffered time range with 25% extension on both sides""" + duration = end_time - start_time + buffer_duration = duration * self.buffer_percentage + + buffered_start = start_time - buffer_duration + buffered_end = end_time + buffer_duration + + return buffered_start, buffered_end + + def _check_file_modifications(self, csv_files: List[str]) -> bool: + """Check if any CSV files have been modified since last cache""" + for file_path in csv_files: + if not os.path.exists(file_path): + continue + + current_mtime = os.path.getmtime(file_path) + if ( + file_path not in self.file_mtimes + or self.file_mtimes[file_path] != current_mtime + ): + self.file_mtimes[file_path] = current_mtime + return True + + return False + + def _find_overlapping_cache( + self, variables: List[str], start_time: datetime, end_time: datetime + ) -> Optional[str]: + """Find existing cache entry that can satisfy the request""" + with self.cache_lock: + for cache_key, cache_entry in self.cache.items(): + # Check if variables match + if set(cache_entry["variables"]) != set(variables): + continue + + # Check if time range is covered by cached range + cached_start = cache_entry["time_range"]["start"] + cached_end = cache_entry["time_range"]["end"] + + if cached_start <= start_time and cached_end >= end_time: + self.logger.debug(f"Found overlapping cache entry: {cache_key}") + return cache_key + + return None + + def _filter_data_by_time_range( + self, data: pd.DataFrame, start_time: datetime, end_time: datetime + ) -> pd.DataFrame: + """Filter DataFrame to the requested time range""" + if data.empty: + return data + + # Convert timestamp column to datetime if it's not already + if "timestamp" in data.columns: + if not pd.api.types.is_datetime64_any_dtype(data["timestamp"]): + data["timestamp"] = pd.to_datetime(data["timestamp"]) + + # Filter by time range + mask = (data["timestamp"] >= start_time) & (data["timestamp"] <= end_time) + return data[mask].copy() + + return data + + def get_cached_data( + self, + variables: List[str], + start_time: datetime, + end_time: datetime, + csv_files: List[str], + ) -> Optional[pd.DataFrame]: + """ + Retrieve data from cache if available and valid + Returns None if cache miss or invalid + """ + try: + # Check if files have been modified + if self._check_file_modifications(csv_files): + self.logger.debug("CSV files modified, invalidating cache") + self.clear_cache() + return None + + # Look for overlapping cache entry + cache_key = self._find_overlapping_cache(variables, start_time, end_time) + if not cache_key: + return None + + with self.cache_lock: + cache_entry = self.cache[cache_key] + + # Move to end for LRU + self.cache.move_to_end(cache_key) + + # Filter data to requested time range + filtered_data = self._filter_data_by_time_range( + cache_entry["data"], start_time, end_time + ) + + self.logger.info( + f"Cache hit: {len(filtered_data)} points returned from cache" + ) + return filtered_data + + except Exception as e: + self.logger.error(f"Error retrieving cached data: {e}") + return None + + def store_data( + self, + variables: List[str], + start_time: datetime, + end_time: datetime, + data: pd.DataFrame, + ): + """ + Store data in cache with buffered time range + """ + try: + # Calculate buffered range for cache storage + buffered_start, buffered_end = self._calculate_buffer_range( + start_time, end_time + ) + + cache_key = self._generate_cache_key( + variables, buffered_start, buffered_end + ) + + cache_entry = { + "variables": variables.copy(), + "time_range": {"start": buffered_start, "end": buffered_end}, + "original_request": {"start": start_time, "end": end_time}, + "data": data.copy(), + "cached_at": datetime.now(), + "size_bytes": data.memory_usage(deep=True).sum(), + } + + with self.cache_lock: + # Remove oldest entries if cache is full + while len(self.cache) >= self.max_cache_entries: + oldest_key = next(iter(self.cache)) + removed_entry = self.cache.pop(oldest_key) + self.logger.debug(f"Evicted cache entry: {oldest_key}") + + # Store new entry + self.cache[cache_key] = cache_entry + + total_size = sum(entry["size_bytes"] for entry in self.cache.values()) + self.logger.info( + f"Cached data for {len(variables)} variables, " + f"{len(data)} points, buffered range: {buffered_start} to {buffered_end}" + ) + self.logger.debug( + f"Cache stats: {len(self.cache)} entries, {total_size/1024/1024:.2f} MB" + ) + + except Exception as e: + self.logger.error(f"Error storing data in cache: {e}") + + def clear_cache(self): + """Clear all cached data""" + with self.cache_lock: + self.cache.clear() + self.file_mtimes.clear() + self.logger.info("Cache cleared") + + def get_cache_stats(self) -> Dict: + """Get cache statistics for monitoring""" + with self.cache_lock: + total_size = sum(entry["size_bytes"] for entry in self.cache.values()) + return { + "entries": len(self.cache), + "total_size_mb": total_size / 1024 / 1024, + "max_entries": self.max_cache_entries, + "buffer_percentage": self.buffer_percentage * 100, + } diff --git a/frontend/src/components/PlotHistoricalSession.jsx b/frontend/src/components/PlotHistoricalSession.jsx index 7b0a0e1..331f660 100644 --- a/frontend/src/components/PlotHistoricalSession.jsx +++ b/frontend/src/components/PlotHistoricalSession.jsx @@ -106,17 +106,39 @@ export default function PlotHistoricalSession({ const toast = useToast() const { isOpen: isConfigModalOpen, onOpen: onConfigModalOpen, onClose: onConfigModalClose } = useDisclosure() + // Keep track of the last loaded data range for optimization + const [loadedDataRange, setLoadedDataRange] = useState(null) + // Load historical data on component mount and when time range changes useEffect(() => { loadHistoricalData() }, [session.id, localTimeRange]) - const loadHistoricalData = async () => { + // Function to check if a range is contained within another range + const isRangeContained = (newRange, existingRange) => { + if (!existingRange || !newRange) return false + const newStart = new Date(newRange.start).getTime() + const newEnd = new Date(newRange.end).getTime() + const existingStart = new Date(existingRange.start).getTime() + const existingEnd = new Date(existingRange.end).getTime() + + return newStart >= existingStart && newEnd <= existingEnd + } + + const loadHistoricalData = async (forceReload = false) => { if (!session.variables || session.variables.length === 0) { setError('No variables defined for this plot') return } + // Check if the new range is contained within the previously loaded range + if (!forceReload && loadedDataRange && isRangeContained(localTimeRange, loadedDataRange)) { + console.log('📊 Zoom optimization: New range is contained within loaded data, skipping reload') + console.log('📊 New range:', localTimeRange) + console.log('📊 Loaded range:', loadedDataRange) + return + } + try { setIsLoading(true) setError(null) @@ -136,6 +158,7 @@ export default function PlotHistoricalSession({ console.log('📊 Loading historical data for session:', session.id) console.log('📊 Request data:', requestData) + console.log('📊 Force reload:', forceReload) // Simulate progress const progressInterval = setInterval(() => { @@ -152,6 +175,13 @@ export default function PlotHistoricalSession({ if (response.data) { setHistoricalData(response.data) + + // Update the loaded data range for optimization + setLoadedDataRange({ + start: new Date(localTimeRange.start), + end: new Date(localTimeRange.end) + }) + setDataStats({ totalPoints: response.total_points || response.data.length, variablesFound: response.variables_found || [], @@ -162,7 +192,8 @@ export default function PlotHistoricalSession({ console.log('📊 Historical data loaded:', { points: response.data.length, variables: response.variables_found, - loadTime + loadTime, + loadedRange: localTimeRange }) toast({ @@ -206,13 +237,26 @@ export default function PlotHistoricalSession({ } const handleZoomToTimeRange = (start, end) => { - console.log('📊 Zoom event - loading data for range:', { start, end }) - setLocalTimeRange({ start: new Date(start), end: new Date(end) }) + console.log('📊 Zoom event - evaluating range:', { start, end }) + const newRange = { start: new Date(start), end: new Date(end) } + + // Check if the new range is contained within the loaded data + if (loadedDataRange && isRangeContained(newRange, loadedDataRange)) { + console.log('📊 Zoom optimization: Range contained in loaded data, skipping reload') + setLocalTimeRange(newRange) + } else { + console.log('📊 Zoom requires data reload - new range outside loaded data') + setLocalTimeRange(newRange) + } } const handlePanToTimeRange = (start, end) => { - console.log('📊 Pan event - loading data for range:', { start, end }) - setLocalTimeRange({ start: new Date(start), end: new Date(end) }) + console.log('📊 Pan event - evaluating range:', { start, end }) + const newRange = { start: new Date(start), end: new Date(end) } + + // Pan always requires checking if we need new data + console.log('📊 Pan event - loading data for range:', newRange) + setLocalTimeRange(newRange) } // Color mode @@ -276,7 +320,7 @@ export default function PlotHistoricalSession({ icon={} size="sm" variant="ghost" - onClick={loadHistoricalData} + onClick={() => loadHistoricalData(true)} isLoading={isLoading} /> diff --git a/main.py b/main.py index 3574012..e00f299 100644 --- a/main.py +++ b/main.py @@ -21,6 +21,7 @@ except ImportError: TKINTER_AVAILABLE = False print("Warning: tkinter not available. File browse functionality will be limited.") from core import PLCDataStreamer +from core.historical_cache import HistoricalDataCache from utils.json_manager import JSONManager, SchemaManager from utils.symbol_loader import SymbolLoader from utils.symbol_processor import SymbolProcessor @@ -64,6 +65,7 @@ def project_path(*parts: str) -> str: # Global instances streamer = None +historical_cache = None json_manager = JSONManager() schema_manager = SchemaManager() @@ -1838,7 +1840,9 @@ def get_plot_variables(): @app.route("/api/plots/historical", methods=["POST"]) def get_historical_data(): - """Get historical data from CSV files for plot initialization""" + """Get historical data from CSV files for plot initialization with caching""" + global historical_cache + print("🔍 DEBUG: Historical endpoint called") try: data = request.get_json() @@ -1851,7 +1855,7 @@ def get_historical_data(): variables = data.get("variables", []) time_window_seconds = data.get("time_window", 60) - # NEW: Support for explicit start_time and end_time parameters + # Support for explicit start_time and end_time parameters start_time_param = data.get("start_time") end_time_param = data.get("end_time") @@ -1873,33 +1877,29 @@ def get_historical_data(): variables = valid_variables print(f"🔍 DEBUG: Valid variables after filtering: {variables}") - # Import pandas and glob (datetime already imported globally) + # Import required modules try: - print("🔍 DEBUG: Importing pandas...") + print("🔍 DEBUG: Importing modules...") import pandas as pd - - print("🔍 DEBUG: Importing glob...") import glob - - print("🔍 DEBUG: Importing timedelta...") from datetime import timedelta - print("🔍 DEBUG: All imports successful") except ImportError as e: print(f"❌ DEBUG: Import failed: {e}") return jsonify({"error": f"pandas import failed: {str(e)}"}), 500 - except Exception as e: - print(f"❌ DEBUG: Unexpected import error: {e}") - import traceback - traceback.print_exc() - return jsonify({"error": f"Import error: {str(e)}"}), 500 + # Initialize cache if not exists + if historical_cache is None: + historical_cache = HistoricalDataCache( + logger=streamer.logger if streamer and streamer.logger else None, + max_cache_entries=50, + buffer_percentage=0.25 + ) # Calculate time range try: print("🔍 DEBUG: Calculating time range...") - # NEW: Use explicit start/end times if provided, otherwise fall back to time_window if start_time_param and end_time_param: start_time = datetime.fromisoformat(start_time_param.replace('Z', '+00:00')) end_time = datetime.fromisoformat(end_time_param.replace('Z', '+00:00')) @@ -1923,145 +1923,119 @@ def get_historical_data(): except Exception as e: print(f"❌ DEBUG: Time calculation error: {e}") - import traceback - - traceback.print_exc() return jsonify({"error": f"Time calculation failed: {str(e)}"}), 500 - # Get records directory - try: - print("🔍 DEBUG: Getting records directory...") - records_dir = os.path.join(os.path.dirname(__file__), "records") - print(f"🔍 DEBUG: Records directory: {records_dir}") - print(f"🔍 DEBUG: Records dir exists: {os.path.exists(records_dir)}") - - if not os.path.exists(records_dir): - print("🔍 DEBUG: Records directory not found, returning empty data") - return jsonify({ - "data": [], - "time_range": { - "start": start_time.isoformat(), - "end": end_time.isoformat(), - }, - "variables_found": [], - "total_points": 0, - }) - - historical_data = [] - - # NEW: Improved date folder calculation to support wider time ranges - print("🔍 DEBUG: Calculating date folders...") - current_date = start_time.date() - end_date = end_time.date() - date_folders = [] + # Get relevant CSV files for cache checking + records_dir = os.path.join(os.path.dirname(__file__), "records") + csv_files = [] + + if os.path.exists(records_dir): + # Calculate buffered time range for file discovery (same as cache buffer) + duration = end_time - start_time + buffer_duration = duration * 0.25 + buffer_start = start_time - buffer_duration + buffer_end = end_time + buffer_duration - # Generate all dates in the range + current_date = buffer_start.date() + end_date = buffer_end.date() + + # Collect all relevant CSV files while current_date <= end_date: date_str = current_date.strftime("%d-%m-%Y") folder_path = os.path.join(records_dir, date_str) if os.path.exists(folder_path): - date_folders.append(folder_path) - print(f"🔍 DEBUG: Added date folder: {folder_path}") + csv_files.extend(glob.glob(os.path.join(folder_path, "*.csv"))) current_date += timedelta(days=1) + + print(f"🔍 DEBUG: Found {len(csv_files)} CSV files for cache checking") + + # Try to get data from cache first + cached_data = historical_cache.get_cached_data(variables, start_time, end_time, csv_files) + + if cached_data is not None: + print("� DEBUG: Cache hit! Returning cached data") - print(f"🔍 DEBUG: Found date folders: {date_folders}") - except Exception as e: - print(f"❌ DEBUG: Records directory error: {e}") - import traceback + # Convert DataFrame to the expected format + historical_data = [] + for _, row in cached_data.iterrows(): + if 'timestamp' in row and 'variable' in row and 'value' in row: + historical_data.append({ + "timestamp": row['timestamp'].isoformat() if pd.api.types.is_datetime64_any_dtype(row['timestamp']) else str(row['timestamp']), + "variable": str(row['variable']), + "value": float(row['value']) if pd.api.types.is_numeric_dtype(type(row['value'])) else row['value'] + }) + + return jsonify({ + "data": historical_data, + "time_range": { + "start": start_time.isoformat(), + "end": end_time.isoformat(), + }, + "variables_found": list(set([item["variable"] for item in historical_data])), + "total_points": len(historical_data), + "cached": True # Indicate this was from cache + }) - traceback.print_exc() - return jsonify({"error": f"Records directory error: {str(e)}"}), 500 + # Cache miss - load data from CSV files + print("🔍 DEBUG: Cache miss - loading from CSV files") + historical_data = [] + + if not os.path.exists(records_dir): + print("🔍 DEBUG: Records directory not found, returning empty data") + return jsonify({ + "data": [], + "time_range": { + "start": start_time.isoformat(), + "end": end_time.isoformat(), + }, + "variables_found": [], + "total_points": 0, + }) - # Search for CSV files with any of the required variables + # Calculate extended range for cache storage (25% buffer) + duration = end_time - start_time + buffer_duration = duration * 0.25 + buffered_start = start_time - buffer_duration + buffered_end = end_time + buffer_duration + + # Search for CSV files in the buffered range + current_date = buffered_start.date() + end_date = buffered_end.date() + date_folders = [] + + while current_date <= end_date: + date_str = current_date.strftime("%d-%m-%Y") + folder_path = os.path.join(records_dir, date_str) + if os.path.exists(folder_path): + date_folders.append(folder_path) + current_date += timedelta(days=1) + + print(f"🔍 DEBUG: Processing {len(date_folders)} date folders with buffer") + + # Process CSV files and collect all data (including buffer) + all_data_for_cache = [] + for folder_path in date_folders: - csv_files = glob.glob(os.path.join(folder_path, "*.csv")) - print(f"🔍 DEBUG: CSV files in {folder_path}: {csv_files}") - - for csv_file in csv_files: + csv_files_in_folder = glob.glob(os.path.join(folder_path, "*.csv")) + + for csv_file in csv_files_in_folder: try: - print(f"🔍 DEBUG: Processing CSV file: {csv_file}") - - # Read first line to check if any required variables are present with proper encoding handling - header_line = None - for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]: - try: - with open(csv_file, "r", encoding=encoding) as f: - header_line = f.readline().strip() - if header_line: - print( - f"🔍 DEBUG: Successfully read header with {encoding} encoding" - ) - break - except (UnicodeDecodeError, UnicodeError): - continue - except Exception: - continue - - if not header_line: - print( - f"🔍 DEBUG: Could not read header from {csv_file}, skipping" - ) - continue - - # Clean header line from BOM and normalize - header_line = header_line.replace("\ufeff", "").replace("\x00", "") - headers = [ - h.strip().replace("\x00", "") for h in header_line.split(",") - ] - # Clean any remaining unicode artifacts - headers = [ - h - for h in headers - if h and len(h.replace("\x00", "").strip()) > 0 - ] - print(f"🔍 DEBUG: Headers in {csv_file}: {headers}") - - # Check if any of our variables are in this file - matching_vars = [var for var in variables if var in headers] - print(f"🔍 DEBUG: Matching variables: {matching_vars}") - - if not matching_vars: - print( - f"🔍 DEBUG: No matching variables in {csv_file}, skipping" - ) - continue - - # Read the CSV file with proper encoding and error handling - print(f"🔍 DEBUG: Reading CSV file with pandas...") + # Read CSV file with encoding detection df = None for encoding in ["utf-8", "utf-8-sig", "utf-16", "latin-1"]: try: - df = pd.read_csv( - csv_file, encoding=encoding, on_bad_lines="skip" - ) - print( - f"🔍 DEBUG: CSV loaded with {encoding}, shape: {df.shape}" - ) + df = pd.read_csv(csv_file, encoding=encoding, on_bad_lines="skip") break - except ( - UnicodeDecodeError, - UnicodeError, - pd.errors.ParserError, - ): + except: continue - except Exception as e: - print(f"🔍 DEBUG: Error reading with {encoding}: {e}") - continue - + if df is None: - print( - f"Warning: Could not read CSV file {csv_file} with any encoding" - ) continue - # Clean column names from BOM and unicode artifacts - df.columns = [ - col.replace("\ufeff", "").replace("\x00", "").strip() - for col in df.columns - ] + # Clean column names + df.columns = [col.replace("\ufeff", "").replace("\x00", "").strip() for col in df.columns] - # Convert timestamp to datetime with flexible parsing - print(f"🔍 DEBUG: Converting timestamps...") + # Find timestamp column timestamp_col = None for col in df.columns: if "timestamp" in col.lower(): @@ -2069,74 +2043,40 @@ def get_historical_data(): break if timestamp_col is None: - print( - f"🔍 DEBUG: No timestamp column found in {csv_file}, skipping" - ) continue - try: - # Try multiple timestamp formats - df[timestamp_col] = pd.to_datetime( - df[timestamp_col], errors="coerce" - ) - # Remove rows with invalid timestamps - df = df.dropna(subset=[timestamp_col]) - - if df.empty: - print( - f"🔍 DEBUG: No valid timestamps in {csv_file}, skipping" - ) - continue - - # Normalize column name to 'timestamp' - if timestamp_col != "timestamp": - df = df.rename(columns={timestamp_col: "timestamp"}) - - print( - f"🔍 DEBUG: Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}" - ) - print(f"🔍 DEBUG: Filter range: {start_time} to {end_time}") - except Exception as e: - print( - f"🔍 DEBUG: Timestamp conversion failed for {csv_file}: {e}" - ) + # Convert timestamps + df[timestamp_col] = pd.to_datetime(df[timestamp_col], errors="coerce") + df = df.dropna(subset=[timestamp_col]) + + if df.empty: continue - # Recalculate matching variables after column cleaning - clean_headers = list(df.columns) - matching_vars = [var for var in variables if var in clean_headers] - print( - f"🔍 DEBUG: Matching variables after cleaning: {matching_vars}" - ) + # Normalize column name + if timestamp_col != "timestamp": + df = df.rename(columns={timestamp_col: "timestamp"}) - if not matching_vars: - print( - f"🔍 DEBUG: No matching variables after cleaning in {csv_file}, skipping" - ) - continue - - # Filter by time range - mask = (df["timestamp"] >= start_time) & ( - df["timestamp"] <= end_time - ) + # Filter to buffered time range + mask = (df["timestamp"] >= buffered_start) & (df["timestamp"] <= buffered_end) filtered_df = df[mask] - print(f"🔍 DEBUG: Filtered dataframe shape: {filtered_df.shape}") if filtered_df.empty: - print(f"🔍 DEBUG: No data in time range for {csv_file}") continue - # Extract data for matching variables only - print(f"🔍 DEBUG: Extracting data for variables: {matching_vars}") + # Check for matching variables + matching_vars = [var for var in variables if var in filtered_df.columns] + + if not matching_vars: + continue + + # Extract data for cache for _, row in filtered_df.iterrows(): timestamp = row["timestamp"] for var in matching_vars: if var in row and pd.notna(row[var]): try: - # Convert value to appropriate type value = row[var] - - # Handle boolean values + # Type conversion if isinstance(value, str): value_lower = value.lower().strip() if value_lower == "true": @@ -2153,63 +2093,101 @@ def get_historical_data(): else: continue - historical_data.append( - { - "timestamp": timestamp.isoformat(), - "variable": var, - "value": value, - } - ) - except Exception as e: - # Skip invalid values - print( - f"🔍 DEBUG: Skipping invalid value for {var}: {e}" - ) + all_data_for_cache.append({ + "timestamp": timestamp, + "variable": var, + "value": value, + }) + except: continue except Exception as e: - # Skip files that can't be read print(f"Warning: Could not read CSV file {csv_file}: {e}") continue - # Sort by timestamp - historical_data.sort(key=lambda x: x["timestamp"]) + # Convert to DataFrame for caching + if all_data_for_cache: + cache_df = pd.DataFrame(all_data_for_cache) + cache_df = cache_df.sort_values('timestamp') + + # Store in cache + historical_cache.store_data(variables, start_time, end_time, cache_df) + + # Filter to actual requested range for response + response_mask = (cache_df["timestamp"] >= start_time) & (cache_df["timestamp"] <= end_time) + response_df = cache_df[response_mask] + + # Convert to response format + for _, row in response_df.iterrows(): + historical_data.append({ + "timestamp": row["timestamp"].isoformat(), + "variable": row["variable"], + "value": row["value"] + }) - print(f"🔍 DEBUG: Total historical data points found: {len(historical_data)}") - print( - f"🔍 DEBUG: Variables found: {list(set([item['variable'] for item in historical_data]))}" - ) + print(f"🔍 DEBUG: Loaded {len(historical_data)} data points for response") + print(f"🔍 DEBUG: Cached {len(all_data_for_cache)} total data points") - return jsonify( - { - "data": historical_data, - "time_range": { - "start": start_time.isoformat(), - "end": end_time.isoformat(), - }, - "variables_found": list( - set([item["variable"] for item in historical_data]) - ), - "total_points": len(historical_data), - } - ) + return jsonify({ + "data": historical_data, + "time_range": { + "start": start_time.isoformat(), + "end": end_time.isoformat(), + }, + "variables_found": list(set([item["variable"] for item in historical_data])), + "total_points": len(historical_data), + "cached": False # Indicate this was loaded fresh + }) - except ImportError as e: - return ( - jsonify( - { - "error": f"pandas is required for historical data processing: {str(e)}" - } - ), - 500, - ) except Exception as e: import traceback - traceback.print_exc() return jsonify({"error": f"Internal server error: {str(e)}"}), 500 +@app.route("/api/plots/historical/cache/stats", methods=["GET"]) +def get_cache_stats(): + """Get cache statistics for monitoring""" + global historical_cache + + try: + if historical_cache is None: + return jsonify({ + "cache_initialized": False, + "stats": { + "entries": 0, + "total_size_mb": 0, + "max_entries": 0, + "buffer_percentage": 0 + } + }) + + stats = historical_cache.get_cache_stats() + return jsonify({ + "cache_initialized": True, + "stats": stats + }) + + except Exception as e: + return jsonify({"error": str(e)}), 500 + + +@app.route("/api/plots/historical/cache/clear", methods=["POST"]) +def clear_cache(): + """Clear the historical data cache""" + global historical_cache + + try: + if historical_cache is not None: + historical_cache.clear_cache() + return jsonify({"success": True, "message": "Cache cleared successfully"}) + else: + return jsonify({"success": False, "message": "Cache not initialized"}) + + except Exception as e: + return jsonify({"error": str(e)}), 500 + + @app.route("/api/plots/sessions/", methods=["GET"]) def get_plot_sessions(plot_id): """Get all session IDs for a specific plot ID"""