SIDEL_ScriptsManager/migrate_sqlite_to_postgresq...

377 lines
14 KiB
Python

#!/usr/bin/env python3
"""
SIDEL ScriptsManager - SQLite to PostgreSQL Migration Script
This script migrates data from SQLite to PostgreSQL while maintaining
referential integrity and data consistency.
Usage:
python migrate_sqlite_to_postgresql.py [--source SOURCE_DB] [--target TARGET_URL] [--dry-run]
Arguments:
--source: SQLite database file path (default: data/scriptsmanager.db)
--target: PostgreSQL connection URL (default: from DATABASE_URL env var)
--dry-run: Perform a dry run without making changes
--backup: Create backup before migration
"""
import argparse
import os
import sys
import json
import shutil
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any, Optional
# Add the app directory to Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import sqlite3
from sqlalchemy import create_engine, MetaData, Table, select, insert
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError
# Import application modules
from app.config.config import Config
from app.config.database import db
class DatabaseMigrator:
"""Handles migration from SQLite to PostgreSQL."""
def __init__(self, sqlite_path: str, postgresql_url: str, dry_run: bool = False):
self.sqlite_path = sqlite_path
self.postgresql_url = postgresql_url
self.dry_run = dry_run
# Database connections
self.sqlite_engine = None
self.postgres_engine = None
self.sqlite_metadata = None
self.postgres_metadata = None
# Migration statistics
self.stats = {
'tables_migrated': 0,
'total_records': 0,
'start_time': None,
'end_time': None,
'errors': []
}
def connect_databases(self):
"""Establish connections to both databases."""
try:
# Connect to SQLite
print(f"Connecting to SQLite database: {self.sqlite_path}")
self.sqlite_engine = create_engine(f"sqlite:///{self.sqlite_path}")
self.sqlite_metadata = MetaData()
self.sqlite_metadata.reflect(bind=self.sqlite_engine)
# Connect to PostgreSQL
print(f"Connecting to PostgreSQL database...")
self.postgres_engine = create_engine(self.postgresql_url)
self.postgres_metadata = MetaData()
# Test connections
with self.sqlite_engine.connect() as conn:
result = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
sqlite_tables = [row[0] for row in result.fetchall()]
print(f"Found {len(sqlite_tables)} tables in SQLite: {sqlite_tables}")
with self.postgres_engine.connect() as conn:
result = conn.execute("SELECT version()")
pg_version = result.fetchone()[0]
print(f"PostgreSQL version: {pg_version.split()[1]}")
return True
except Exception as e:
print(f"Error connecting to databases: {e}")
self.stats['errors'].append(f"Connection error: {e}")
return False
def create_backup(self):
"""Create backup of SQLite database before migration."""
try:
backup_dir = Path("backup") / datetime.now().strftime("%Y-%m-%d")
backup_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = backup_dir / f"sqlite_backup_{timestamp}.db"
print(f"Creating backup: {backup_path}")
shutil.copy2(self.sqlite_path, backup_path)
return str(backup_path)
except Exception as e:
print(f"Error creating backup: {e}")
return None
def get_table_dependencies(self) -> List[str]:
"""Get tables in dependency order for migration."""
# Define table migration order based on foreign key dependencies
# This should be updated based on your actual schema
dependency_order = [
'users', # Independent table
'scripts', # Depends on users
'execution_logs', # Depends on scripts
'script_tags', # Depends on scripts
'user_preferences', # Depends on users
'backup_logs', # Independent
'system_settings', # Independent
]
# Get actual tables from SQLite
available_tables = list(self.sqlite_metadata.tables.keys())
# Return only tables that exist, in dependency order
ordered_tables = []
for table in dependency_order:
if table in available_tables:
ordered_tables.append(table)
# Add any remaining tables not in dependency list
for table in available_tables:
if table not in ordered_tables and not table.startswith('sqlite_'):
ordered_tables.append(table)
return ordered_tables
def migrate_table_data(self, table_name: str) -> Dict[str, Any]:
"""Migrate data from a specific table."""
print(f"\\nMigrating table: {table_name}")
try:
# Get table schema from SQLite
sqlite_table = self.sqlite_metadata.tables[table_name]
# Reflect PostgreSQL schema (should already be created by SQLAlchemy)
self.postgres_metadata.reflect(bind=self.postgres_engine)
if table_name not in self.postgres_metadata.tables:
print(f"Warning: Table {table_name} does not exist in PostgreSQL, skipping...")
return {'status': 'skipped', 'reason': 'table_not_found', 'records': 0}
postgres_table = self.postgres_metadata.tables[table_name]
# Read data from SQLite
with self.sqlite_engine.connect() as sqlite_conn:
result = sqlite_conn.execute(select(sqlite_table))
rows = result.fetchall()
columns = result.keys()
if not rows:
print(f"Table {table_name} is empty, skipping...")
return {'status': 'empty', 'records': 0}
print(f"Found {len(rows)} records in {table_name}")
if self.dry_run:
print(f"DRY RUN: Would migrate {len(rows)} records to {table_name}")
return {'status': 'dry_run', 'records': len(rows)}
# Prepare data for PostgreSQL
data_to_insert = []
for row in rows:
row_dict = dict(zip(columns, row))
# Handle data type conversions if needed
converted_row = self.convert_row_data(table_name, row_dict)
data_to_insert.append(converted_row)
# Insert data into PostgreSQL
with self.postgres_engine.connect() as postgres_conn:
# Clear existing data (if any)
postgres_conn.execute(postgres_table.delete())
# Insert new data
if data_to_insert:
postgres_conn.execute(postgres_table.insert(), data_to_insert)
postgres_conn.commit()
print(f"Successfully migrated {len(data_to_insert)} records to {table_name}")
return {'status': 'success', 'records': len(data_to_insert)}
except Exception as e:
print(f"Error migrating table {table_name}: {e}")
self.stats['errors'].append(f"Table {table_name}: {e}")
return {'status': 'error', 'error': str(e), 'records': 0}
def convert_row_data(self, table_name: str, row_data: Dict[str, Any]) -> Dict[str, Any]:
"""Convert SQLite data types to PostgreSQL compatible format."""
converted = {}
for column, value in row_data.items():
if value is None:
converted[column] = None
elif isinstance(value, str):
# Handle string data
converted[column] = value
elif isinstance(value, (int, float)):
# Handle numeric data
converted[column] = value
elif isinstance(value, bytes):
# Handle binary data
converted[column] = value
else:
# Convert other types to string
converted[column] = str(value)
return converted
def verify_migration(self) -> bool:
"""Verify that migration was successful by comparing record counts."""
print("\\nVerifying migration...")
verification_passed = True
for table_name in self.get_table_dependencies():
try:
# Count records in SQLite
with self.sqlite_engine.connect() as sqlite_conn:
sqlite_table = self.sqlite_metadata.tables[table_name]
result = sqlite_conn.execute(f"SELECT COUNT(*) FROM {table_name}")
sqlite_count = result.scalar()
# Count records in PostgreSQL
with self.postgres_engine.connect() as postgres_conn:
result = postgres_conn.execute(f"SELECT COUNT(*) FROM {table_name}")
postgres_count = result.scalar()
print(f"{table_name}: SQLite={sqlite_count}, PostgreSQL={postgres_count}")
if sqlite_count != postgres_count:
print(f"❌ Record count mismatch in {table_name}")
verification_passed = False
else:
print(f"{table_name} verified successfully")
except Exception as e:
print(f"❌ Error verifying {table_name}: {e}")
verification_passed = False
return verification_passed
def run_migration(self, create_backup: bool = True) -> bool:
"""Run the complete migration process."""
print("=== SIDEL ScriptsManager: SQLite to PostgreSQL Migration ===")
self.stats['start_time'] = datetime.now()
try:
# Create backup if requested
if create_backup and not self.dry_run:
backup_path = self.create_backup()
if backup_path:
print(f"Backup created: {backup_path}")
else:
print("Warning: Could not create backup")
# Connect to databases
if not self.connect_databases():
return False
# Get migration order
tables_to_migrate = self.get_table_dependencies()
print(f"\\nTables to migrate: {tables_to_migrate}")
# Migrate each table
for table_name in tables_to_migrate:
result = self.migrate_table_data(table_name)
if result['status'] == 'success':
self.stats['tables_migrated'] += 1
self.stats['total_records'] += result['records']
# Verify migration (skip for dry run)
if not self.dry_run:
verification_passed = self.verify_migration()
if not verification_passed:
print("\\n❌ Migration verification failed!")
return False
self.stats['end_time'] = datetime.now()
duration = self.stats['end_time'] - self.stats['start_time']
print(f"\\n=== Migration Summary ===")
print(f"Duration: {duration}")
print(f"Tables migrated: {self.stats['tables_migrated']}")
print(f"Total records: {self.stats['total_records']}")
print(f"Errors: {len(self.stats['errors'])}")
if self.stats['errors']:
print("\\nErrors encountered:")
for error in self.stats['errors']:
print(f" - {error}")
if self.dry_run:
print("\\n✅ DRY RUN completed successfully")
else:
print("\\n✅ Migration completed successfully")
return True
except Exception as e:
print(f"\\n❌ Migration failed: {e}")
return False
finally:
# Close connections
if self.sqlite_engine:
self.sqlite_engine.dispose()
if self.postgres_engine:
self.postgres_engine.dispose()
def main():
"""Main migration script entry point."""
parser = argparse.ArgumentParser(description="Migrate SIDEL ScriptsManager from SQLite to PostgreSQL")
parser.add_argument(
'--source',
default='data/scriptsmanager.db',
help='SQLite database file path (default: data/scriptsmanager.db)'
)
parser.add_argument(
'--target',
default=os.getenv('DATABASE_URL'),
help='PostgreSQL connection URL (default: from DATABASE_URL env var)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Perform a dry run without making changes'
)
parser.add_argument(
'--no-backup',
action='store_true',
help='Skip creating backup before migration'
)
args = parser.parse_args()
# Validate arguments
if not args.target:
print("Error: PostgreSQL target URL must be specified via --target or DATABASE_URL environment variable")
sys.exit(1)
if not args.target.startswith('postgresql://'):
print("Error: Target URL must be a PostgreSQL connection string")
sys.exit(1)
if not Path(args.source).exists():
print(f"Error: SQLite database file not found: {args.source}")
sys.exit(1)
# Run migration
migrator = DatabaseMigrator(args.source, args.target, args.dry_run)
success = migrator.run_migration(create_backup=not args.no_backup)
sys.exit(0 if success else 1)
if __name__ == '__main__':
main()