Files
CTXD/app/src/ctxd/migrate_sqlite_to_pg.py
T
overseer fe63ad350e fix: reset SERIAL sequences after SQLite migration to prevent UniqueViolation
Migration script now resets SERIAL sequences (audit_log, context_files, etc.)
to max(existing_id) after copying rows. Without this, the sequence is still at
its post-schema-creation value and every INSERT hits a duplicate key error.
2026-06-25 10:50:35 +00:00

209 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""
Migrate data from SQLite to PostgreSQL.
Usage:
DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd \
SQLITE_PATH=/data/ctxd.db \
python3 -m ctxd.migrate_sqlite_to_pg
Or inside the Docker container:
docker exec dossier python3 -m ctxd.migrate_sqlite_to_pg
Environment variables:
DATABASE_URL — PostgreSQL connection string (required)
SQLITE_PATH — Path to SQLite DB file (default: /data/ctxd.db)
"""
import os
import sqlite3
import sys
try:
import psycopg
from psycopg.rows import dict_row
except ImportError:
print("ERROR: psycopg is required. Install with: pip install psycopg[binary]")
sys.exit(1)
SQLITE_PATH = os.environ.get("SQLITE_PATH", "/data/ctxd.db")
PG_URL = os.environ.get("DATABASE_URL", "")
# Tables in migration order (parents before children for FK safety)
TABLES = [
"users",
"projects",
"project_permissions",
"project_context",
"context_files",
"user_profiles",
"user_workspaces",
"workspace_files",
"change_requests",
"reviews",
"snapshots",
"audit_log",
]
# Columns that need type conversion from SQLite INTEGER to PostgreSQL BOOLEAN
BOOL_COLUMNS = {
"users": ["active"],
"projects": ["auto_sync"],
}
def migrate():
if not PG_URL:
print("ERROR: DATABASE_URL environment variable not set")
print("Example: DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd")
sys.exit(1)
if not os.path.exists(SQLITE_PATH):
print(f"ERROR: SQLite database not found at {SQLITE_PATH}")
sys.exit(1)
print(f"Migrating from SQLite ({SQLITE_PATH}) to PostgreSQL ({PG_URL})")
print()
# Connect to SQLite
sconn = sqlite3.connect(SQLITE_PATH)
sconn.row_factory = sqlite3.Row
# Connect to PostgreSQL
pconn = psycopg.connect(PG_URL, row_factory=dict_row)
pconn.autocommit = False
# Step 1: Clear all data from PostgreSQL tables
print("Clearing existing PostgreSQL data...")
pconn.execute(
"TRUNCATE TABLE fts_context, audit_log, reviews, change_requests, "
"workspace_files, user_workspaces, context_files, project_context, "
"snapshots, user_profiles, project_permissions, projects, users CASCADE"
)
pconn.commit()
print(" Done.")
print()
# Step 2: Migrate each table
total_rows = 0
for table in TABLES:
# Get column names from SQLite
try:
cur = sconn.execute(f"PRAGMA table_info({table})")
columns = [row["name"] for row in cur.fetchall()]
except sqlite3.OperationalError:
print(f" {table}: table not found in SQLite, skipping")
continue
if not columns:
print(f" {table}: no columns found, skipping")
continue
# Read all rows from SQLite
col_str = ", ".join(columns)
try:
cur = sconn.execute(f"SELECT {col_str} FROM {table}")
rows = cur.fetchall()
except sqlite3.OperationalError:
print(f" {table}: error reading from SQLite, skipping")
continue
if not rows:
print(f" {table}: 0 rows")
continue
# Insert into PostgreSQL
val_str = ", ".join(["%s"] * len(columns))
insert_sql = f'INSERT INTO {table} ({col_str}) VALUES ({val_str}) ON CONFLICT DO NOTHING'
bool_cols = BOOL_COLUMNS.get(table, [])
count = 0
for row in rows:
values = []
for col in columns:
val = row[col]
# Convert integer 0/1 to boolean for PostgreSQL BOOLEAN columns
if col in bool_cols and val is not None:
val = bool(val)
values.append(val)
try:
pconn.execute(insert_sql, values)
count += 1
except Exception as fk_err:
# Skip rows with FK violations (e.g. orphaned snapshots)
pconn.rollback()
count_skipped = count_skipped + 1 if 'count_skipped' in dir() else 1
continue
pconn.commit()
total_rows += count
print(f" {table}: {count} rows migrated")
# Step 3: Rebuild FTS index
print()
print("Rebuilding FTS index...")
pconn.execute("DELETE FROM fts_context")
pconn.commit()
# Re-populate FTS from source tables
pconn.execute("""
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
SELECT project_id, content, project_id, 'context.md', 'project_context',
to_tsvector('english', content)
FROM project_context
WHERE content != ''
""")
fts_pc = pconn.cursor().rowcount
pconn.execute("""
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
SELECT file_id::text, content, project_id, file_path, 'context_file',
to_tsvector('english', content)
FROM context_files
WHERE content != ''
""")
fts_cf = pconn.cursor().rowcount
pconn.execute("""
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
SELECT user_id, content, '~user', user_id, 'user_profile',
to_tsvector('english', content)
FROM user_profiles
WHERE content != ''
""")
fts_up = pconn.cursor().rowcount
pconn.commit()
print(f" project_context: {fts_pc} entries")
print(f" context_files: {fts_cf} entries")
print(f" user_profiles: {fts_up} entries")
# Step 4: Reset SERIAL sequences to max(existing_id) + 1
print()
print("Resetting SERIAL sequences...")
for table, col in [
("audit_log", "entry_id"),
("context_files", "file_id"),
("project_permissions", "id"),
("reviews", "review_id"),
("workspace_files", "file_id"),
]:
try:
pconn.execute(f"SELECT setval(pg_get_serial_sequence('{table}', '{col}'), COALESCE((SELECT MAX({col}) FROM {table}), 1))")
print(f" {table}.{col}: seq reset")
except Exception as e:
print(f" {table}.{col}: skipped ({e})")
pconn.commit()
print()
print("=" * 60)
print(f"Migration complete! {total_rows} total rows migrated.")
print("=" * 60)
sconn.close()
pconn.close()
if __name__ == "__main__":
migrate()