2026-06-24 22:50:54 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Migrate data from SQLite to PostgreSQL.
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd \
|
|
|
|
|
SQLITE_PATH=/data/ctxd.db \
|
|
|
|
|
python3 -m ctxd.migrate_sqlite_to_pg
|
|
|
|
|
|
|
|
|
|
Or inside the Docker container:
|
|
|
|
|
docker exec dossier python3 -m ctxd.migrate_sqlite_to_pg
|
|
|
|
|
|
|
|
|
|
Environment variables:
|
|
|
|
|
DATABASE_URL — PostgreSQL connection string (required)
|
|
|
|
|
SQLITE_PATH — Path to SQLite DB file (default: /data/ctxd.db)
|
|
|
|
|
"""
|
|
|
|
|
import os
|
|
|
|
|
import sqlite3
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import psycopg
|
|
|
|
|
from psycopg.rows import dict_row
|
|
|
|
|
except ImportError:
|
|
|
|
|
print("ERROR: psycopg is required. Install with: pip install psycopg[binary]")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SQLITE_PATH = os.environ.get("SQLITE_PATH", "/data/ctxd.db")
|
|
|
|
|
PG_URL = os.environ.get("DATABASE_URL", "")
|
|
|
|
|
|
|
|
|
|
# Tables in migration order (parents before children for FK safety)
|
|
|
|
|
TABLES = [
|
|
|
|
|
"users",
|
|
|
|
|
"projects",
|
|
|
|
|
"project_permissions",
|
|
|
|
|
"project_context",
|
|
|
|
|
"context_files",
|
|
|
|
|
"user_profiles",
|
|
|
|
|
"user_workspaces",
|
|
|
|
|
"workspace_files",
|
|
|
|
|
"change_requests",
|
|
|
|
|
"reviews",
|
|
|
|
|
"snapshots",
|
|
|
|
|
"audit_log",
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Columns that need type conversion from SQLite INTEGER to PostgreSQL BOOLEAN
|
|
|
|
|
BOOL_COLUMNS = {
|
|
|
|
|
"users": ["active"],
|
|
|
|
|
"projects": ["auto_sync"],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def migrate():
|
|
|
|
|
if not PG_URL:
|
|
|
|
|
print("ERROR: DATABASE_URL environment variable not set")
|
|
|
|
|
print("Example: DATABASE_URL=postgresql://ctxd:ctxd_local_dev@localhost:5432/ctxd")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(SQLITE_PATH):
|
|
|
|
|
print(f"ERROR: SQLite database not found at {SQLITE_PATH}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
print(f"Migrating from SQLite ({SQLITE_PATH}) to PostgreSQL ({PG_URL})")
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
# Connect to SQLite
|
|
|
|
|
sconn = sqlite3.connect(SQLITE_PATH)
|
|
|
|
|
sconn.row_factory = sqlite3.Row
|
|
|
|
|
|
|
|
|
|
# Connect to PostgreSQL
|
|
|
|
|
pconn = psycopg.connect(PG_URL, row_factory=dict_row)
|
|
|
|
|
pconn.autocommit = False
|
|
|
|
|
|
|
|
|
|
# Step 1: Clear all data from PostgreSQL tables
|
|
|
|
|
print("Clearing existing PostgreSQL data...")
|
|
|
|
|
pconn.execute(
|
|
|
|
|
"TRUNCATE TABLE fts_context, audit_log, reviews, change_requests, "
|
|
|
|
|
"workspace_files, user_workspaces, context_files, project_context, "
|
|
|
|
|
"snapshots, user_profiles, project_permissions, projects, users CASCADE"
|
|
|
|
|
)
|
|
|
|
|
pconn.commit()
|
|
|
|
|
print(" Done.")
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
# Step 2: Migrate each table
|
|
|
|
|
total_rows = 0
|
|
|
|
|
for table in TABLES:
|
|
|
|
|
# Get column names from SQLite
|
|
|
|
|
try:
|
|
|
|
|
cur = sconn.execute(f"PRAGMA table_info({table})")
|
|
|
|
|
columns = [row["name"] for row in cur.fetchall()]
|
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
|
print(f" {table}: table not found in SQLite, skipping")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not columns:
|
|
|
|
|
print(f" {table}: no columns found, skipping")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Read all rows from SQLite
|
|
|
|
|
col_str = ", ".join(columns)
|
|
|
|
|
try:
|
|
|
|
|
cur = sconn.execute(f"SELECT {col_str} FROM {table}")
|
|
|
|
|
rows = cur.fetchall()
|
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
|
print(f" {table}: error reading from SQLite, skipping")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not rows:
|
|
|
|
|
print(f" {table}: 0 rows")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Insert into PostgreSQL
|
|
|
|
|
val_str = ", ".join(["%s"] * len(columns))
|
|
|
|
|
insert_sql = f'INSERT INTO {table} ({col_str}) VALUES ({val_str}) ON CONFLICT DO NOTHING'
|
|
|
|
|
|
|
|
|
|
bool_cols = BOOL_COLUMNS.get(table, [])
|
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
|
values = []
|
|
|
|
|
for col in columns:
|
|
|
|
|
val = row[col]
|
|
|
|
|
# Convert integer 0/1 to boolean for PostgreSQL BOOLEAN columns
|
|
|
|
|
if col in bool_cols and val is not None:
|
|
|
|
|
val = bool(val)
|
|
|
|
|
values.append(val)
|
|
|
|
|
try:
|
|
|
|
|
pconn.execute(insert_sql, values)
|
|
|
|
|
count += 1
|
|
|
|
|
except Exception as fk_err:
|
|
|
|
|
# Skip rows with FK violations (e.g. orphaned snapshots)
|
|
|
|
|
pconn.rollback()
|
|
|
|
|
count_skipped = count_skipped + 1 if 'count_skipped' in dir() else 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
pconn.commit()
|
|
|
|
|
total_rows += count
|
|
|
|
|
print(f" {table}: {count} rows migrated")
|
|
|
|
|
|
|
|
|
|
# Step 3: Rebuild FTS index
|
|
|
|
|
print()
|
|
|
|
|
print("Rebuilding FTS index...")
|
|
|
|
|
pconn.execute("DELETE FROM fts_context")
|
|
|
|
|
pconn.commit()
|
|
|
|
|
|
|
|
|
|
# Re-populate FTS from source tables
|
|
|
|
|
pconn.execute("""
|
|
|
|
|
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
|
|
|
|
|
SELECT project_id, content, project_id, 'context.md', 'project_context',
|
|
|
|
|
to_tsvector('english', content)
|
|
|
|
|
FROM project_context
|
|
|
|
|
WHERE content != ''
|
|
|
|
|
""")
|
|
|
|
|
fts_pc = pconn.cursor().rowcount
|
|
|
|
|
|
|
|
|
|
pconn.execute("""
|
|
|
|
|
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
|
|
|
|
|
SELECT file_id::text, content, project_id, file_path, 'context_file',
|
|
|
|
|
to_tsvector('english', content)
|
|
|
|
|
FROM context_files
|
|
|
|
|
WHERE content != ''
|
|
|
|
|
""")
|
|
|
|
|
fts_cf = pconn.cursor().rowcount
|
|
|
|
|
|
|
|
|
|
pconn.execute("""
|
|
|
|
|
INSERT INTO fts_context (source_id, content, project_id, file_path, source_type, tsv)
|
|
|
|
|
SELECT user_id, content, '~user', user_id, 'user_profile',
|
|
|
|
|
to_tsvector('english', content)
|
|
|
|
|
FROM user_profiles
|
|
|
|
|
WHERE content != ''
|
|
|
|
|
""")
|
|
|
|
|
fts_up = pconn.cursor().rowcount
|
|
|
|
|
|
|
|
|
|
pconn.commit()
|
|
|
|
|
print(f" project_context: {fts_pc} entries")
|
|
|
|
|
print(f" context_files: {fts_cf} entries")
|
|
|
|
|
print(f" user_profiles: {fts_up} entries")
|
|
|
|
|
|
2026-06-25 10:50:35 +00:00
|
|
|
# Step 4: Reset SERIAL sequences to max(existing_id) + 1
|
|
|
|
|
print()
|
|
|
|
|
print("Resetting SERIAL sequences...")
|
|
|
|
|
for table, col in [
|
|
|
|
|
("audit_log", "entry_id"),
|
|
|
|
|
("context_files", "file_id"),
|
|
|
|
|
("project_permissions", "id"),
|
|
|
|
|
("reviews", "review_id"),
|
|
|
|
|
("workspace_files", "file_id"),
|
|
|
|
|
]:
|
|
|
|
|
try:
|
|
|
|
|
pconn.execute(f"SELECT setval(pg_get_serial_sequence('{table}', '{col}'), COALESCE((SELECT MAX({col}) FROM {table}), 1))")
|
|
|
|
|
print(f" {table}.{col}: seq reset")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f" {table}.{col}: skipped ({e})")
|
|
|
|
|
pconn.commit()
|
|
|
|
|
|
2026-06-24 22:50:54 +00:00
|
|
|
print()
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
print(f"Migration complete! {total_rows} total rows migrated.")
|
|
|
|
|
print("=" * 60)
|
|
|
|
|
|
|
|
|
|
sconn.close()
|
|
|
|
|
pconn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
migrate()
|