#!/usr/bin/env python3 """ Migrate data from SQLite to PostgreSQL. Usage: DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd \ SQLITE_PATH=/data/ctxd.db \ python3 -m ctxd.migrate_sqlite_to_pg Or inside the Docker container: docker exec dossier python3 -m ctxd.migrate_sqlite_to_pg Environment variables: DATABASE_URL — PostgreSQL connection string (required) SQLITE_PATH — Path to SQLite DB file (default: /data/ctxd.db) """ import os import sqlite3 import sys try: import psycopg from psycopg.rows import dict_row except ImportError: print("ERROR: psycopg is required. Install with: pip install psycopg[binary]") sys.exit(1) SQLITE_PATH = os.environ.get("SQLITE_PATH", "/data/ctxd.db") PG_URL = os.environ.get("DATABASE_URL", "") # Tables in migration order (parents before children for FK safety) TABLES = [ "users", "projects", "project_permissions", "project_context", "context_files", "user_profiles", "user_workspaces", "workspace_files", "change_requests", "reviews", "snapshots", "audit_log", ] # Columns that need type conversion from SQLite INTEGER to PostgreSQL BOOLEAN BOOL_COLUMNS = { "users": ["active"], "projects": ["auto_sync"], } def migrate(): if not PG_URL: print("ERROR: DATABASE_URL environment variable not set") print("Example: DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd") sys.exit(1) if not os.path.exists(SQLITE_PATH): print(f"ERROR: SQLite database not found at {SQLITE_PATH}") sys.exit(1) print(f"Migrating from SQLite ({SQLITE_PATH}) to PostgreSQL ({PG_URL})") print() # Connect to SQLite sconn = sqlite3.connect(SQLITE_PATH) sconn.row_factory = sqlite3.Row # Connect to PostgreSQL pconn = psycopg.connect(PG_URL, row_factory=dict_row) pconn.autocommit = False # Step 1: Clear all data from PostgreSQL tables print("Clearing existing PostgreSQL data...") pconn.execute( "TRUNCATE TABLE fts_context, audit_log, reviews, change_requests, " "workspace_files, user_workspaces, context_files, project_context, " "snapshots, user_profiles, project_permissions, projects, users CASCADE" ) pconn.commit() print(" Done.") print() # Step 2: Migrate each table total_rows = 0 for table in TABLES: # Get column names from SQLite try: cur = sconn.execute(f"PRAGMA table_info({table})") columns = [row["name"] for row in cur.fetchall()] except sqlite3.OperationalError: print(f" {table}: table not found in SQLite, skipping") continue if not columns: print(f" {table}: no columns found, skipping") continue # Read all rows from SQLite col_str = ", ".join(columns) try: cur = sconn.execute(f"SELECT {col_str} FROM {table}") rows = cur.fetchall() except sqlite3.OperationalError: print(f" {table}: error reading from SQLite, skipping") continue if not rows: print(f" {table}: 0 rows") continue # Insert into PostgreSQL val_str = ", ".join(["%s"] * len(columns)) insert_sql = f'INSERT INTO {table} ({col_str}) VALUES ({val_str}) ON CONFLICT DO NOTHING' bool_cols = BOOL_COLUMNS.get(table, []) count = 0 for row in rows: values = [] for col in columns: val = row[col] # Convert integer 0/1 to boolean for PostgreSQL BOOLEAN columns if col in bool_cols and val is not None: val = bool(val) values.append(val) try: pconn.execute(insert_sql, values) count += 1 except Exception as fk_err: # Skip rows with FK violations (e.g. orphaned snapshots) pconn.rollback() count_skipped = count_skipped + 1 if 'count_skipped' in dir() else 1 continue pconn.commit() total_rows += count print(f" {table}: {count} rows migrated") # Step 3: Rebuild FTS index print() print("Rebuilding FTS index...") pconn.execute("DELETE FROM fts_context") pconn.commit() # Re-populate FTS from source tables pconn.execute(""" INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv) SELECT project_id, content, project_id, 'context.md', 'project_context', to_tsvector('english', content) FROM project_context WHERE content != '' """) fts_pc = pconn.cursor().rowcount pconn.execute(""" INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv) SELECT file_id::text, content, project_id, file_path, 'context_file', to_tsvector('english', content) FROM context_files WHERE content != '' """) fts_cf = pconn.cursor().rowcount pconn.execute(""" INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv) SELECT user_id, content, '~user', user_id, 'user_profile', to_tsvector('english', content) FROM user_profiles WHERE content != '' """) fts_up = pconn.cursor().rowcount pconn.commit() print(f" project_context: {fts_pc} entries") print(f" context_files: {fts_cf} entries") print(f" user_profiles: {fts_up} entries") # Step 4: Reset SERIAL sequences to max(existing_id) + 1 print() print("Resetting SERIAL sequences...") for table, col in [ ("audit_log", "entry_id"), ("context_files", "file_id"), ("project_permissions", "id"), ("reviews", "review_id"), ("workspace_files", "file_id"), ]: try: pconn.execute(f"SELECT setval(pg_get_serial_sequence('{table}', '{col}'), COALESCE((SELECT MAX({col}) FROM {table}), 1))") print(f" {table}.{col}: seq reset") except Exception as e: print(f" {table}.{col}: skipped ({e})") pconn.commit() print() print("=" * 60) print(f"Migration complete! {total_rows} total rows migrated.") print("=" * 60) sconn.close() pconn.close() if __name__ == "__main__": migrate()