mirror of
https://github.com/AmruthPillai/Reactive-Resume.git
synced 2026-07-03 09:40:38 +10:00
504 lines
18 KiB
TypeScript
504 lines
18 KiB
TypeScript
import { and, inArray, sql } from "drizzle-orm";
|
|
import { drizzle } from "drizzle-orm/node-postgres";
|
|
import fs from "node:fs/promises";
|
|
import { Pool, type QueryResult } from "pg";
|
|
|
|
import { schema } from "@/integrations/drizzle";
|
|
import { ReactiveResumeV4JSONImporter } from "@/integrations/import/reactive-resume-v4-json";
|
|
import { defaultResumeData } from "@/schema/resume/data";
|
|
import { generateId } from "@/utils/string";
|
|
|
|
// Types for the production database
|
|
type Visibility = "public" | "private";
|
|
|
|
interface ProductionResume {
|
|
id: string;
|
|
title: string;
|
|
slug: string;
|
|
data: unknown; // JSON data
|
|
visibility: Visibility;
|
|
locked: boolean;
|
|
userId: string;
|
|
createdAt: Date;
|
|
updatedAt: Date;
|
|
}
|
|
|
|
interface ProductionStatistics {
|
|
id: string;
|
|
views: number;
|
|
downloads: number;
|
|
resumeId: string;
|
|
createdAt: Date;
|
|
updatedAt: Date;
|
|
}
|
|
|
|
const productionUrl = process.env.PRODUCTION_DATABASE_URL;
|
|
const localUrl = process.env.DATABASE_URL;
|
|
|
|
if (!productionUrl) throw new Error("PRODUCTION_DATABASE_URL is not set");
|
|
if (!localUrl) throw new Error("DATABASE_URL is not set");
|
|
|
|
const productionPool = new Pool({ connectionString: productionUrl });
|
|
const localPool = new Pool({ connectionString: localUrl });
|
|
|
|
const productionDb = drizzle({ client: productionPool });
|
|
const localDb = drizzle({ client: localPool, schema });
|
|
|
|
// == Persistent mapping file paths ==
|
|
const USER_ID_MAP_FILE = "./scripts/migration/user-id-map.json";
|
|
const RESUME_ID_MAP_FILE = "./scripts/migration/resume-id-map.json";
|
|
|
|
// == Progress checkpoint file path ==
|
|
const PROGRESS_FILE = "./scripts/migration/resume-progress.json";
|
|
|
|
// You may tune this for your use case
|
|
// Reduced from 10000 to avoid PostgreSQL message format errors
|
|
const BATCH_SIZE = 10_000;
|
|
|
|
// Chunk size for actual inserts - smaller to avoid PostgreSQL message size limits
|
|
// Especially important for resumes as they contain large JSONB data
|
|
const INSERT_CHUNK_SIZE = 1000;
|
|
|
|
// == Progress checkpoint interface ==
|
|
// Uses cursor-based pagination with (createdAt, id) composite key for efficiency
|
|
interface MigrationProgress {
|
|
// Cursor for pagination - last seen createdAt timestamp
|
|
lastSeenCreatedAt: string | null;
|
|
// Cursor for pagination - last seen id (for tiebreaker when timestamps are equal)
|
|
lastSeenId: string | null;
|
|
resumesCreated: number;
|
|
statisticsCreated: number;
|
|
skipped: number;
|
|
totalResumesProcessed: number;
|
|
errors: number;
|
|
lastUpdated: string;
|
|
}
|
|
|
|
// Flag to track if shutdown was requested
|
|
let shutdownRequested = false;
|
|
|
|
async function loadProgress(): Promise<MigrationProgress | null> {
|
|
try {
|
|
const text = await fs.readFile(PROGRESS_FILE, { encoding: "utf-8" });
|
|
const progress = JSON.parse(text) as MigrationProgress;
|
|
console.log(`📂 Found existing progress file. Last updated: ${progress.lastUpdated}`);
|
|
console.log(` Resuming from cursor (createdAt: ${progress.lastSeenCreatedAt}, id: ${progress.lastSeenId})...`);
|
|
return progress;
|
|
} catch (e) {
|
|
console.warn("⚠️ Failed to load progress file, starting from beginning.", e);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async function saveProgress(progress: MigrationProgress): Promise<void> {
|
|
try {
|
|
progress.lastUpdated = new Date().toISOString();
|
|
await fs.writeFile(PROGRESS_FILE, JSON.stringify(progress, null, 2), { encoding: "utf-8" });
|
|
console.log(`💾 Progress saved at cursor (createdAt: ${progress.lastSeenCreatedAt}, id: ${progress.lastSeenId})`);
|
|
} catch (e) {
|
|
console.error("🚨 Failed to save progress:", e);
|
|
}
|
|
}
|
|
|
|
async function clearProgress(): Promise<void> {
|
|
try {
|
|
await fs.unlink(PROGRESS_FILE);
|
|
console.log("🗑️ Progress file cleared.");
|
|
} catch {
|
|
// Ignore errors when clearing
|
|
}
|
|
}
|
|
|
|
async function loadUserIdMapFromFile(): Promise<Map<string, string>> {
|
|
try {
|
|
const text = await fs.readFile(USER_ID_MAP_FILE, { encoding: "utf-8" });
|
|
const obj = JSON.parse(text);
|
|
return new Map(Object.entries(obj));
|
|
} catch (e) {
|
|
console.warn("⚠️ Failed to load userIdMap from disk, continuing with empty map.", e);
|
|
}
|
|
return new Map<string, string>();
|
|
}
|
|
|
|
async function loadResumeIdMapFromFile(): Promise<Map<string, string>> {
|
|
try {
|
|
const text = await fs.readFile(RESUME_ID_MAP_FILE, { encoding: "utf-8" });
|
|
const obj = JSON.parse(text);
|
|
return new Map(Object.entries(obj));
|
|
} catch (e) {
|
|
console.warn("⚠️ Failed to load resumeIdMap from disk, continuing with empty map.", e);
|
|
}
|
|
return new Map<string, string>();
|
|
}
|
|
|
|
async function saveResumeIdMapToFile(resumeIdMap: Map<string, string>) {
|
|
const obj: Record<string, string> = Object.fromEntries(resumeIdMap.entries());
|
|
await fs.writeFile(RESUME_ID_MAP_FILE, JSON.stringify(obj, null, "\t"), { encoding: "utf-8" });
|
|
}
|
|
|
|
export async function migrateResumes() {
|
|
const migrationStart = performance.now();
|
|
console.log("⌛ Starting resume migration...");
|
|
|
|
let hasMore = true;
|
|
|
|
// Cursor-based pagination state
|
|
let lastSeenCreatedAt: string | null = null;
|
|
let lastSeenId: string | null = null;
|
|
|
|
// Load persistent ID maps from file
|
|
const userIdMap = await loadUserIdMapFromFile();
|
|
const resumeIdMap = await loadResumeIdMapFromFile();
|
|
|
|
// Track migration stats
|
|
let resumesCreated = 0;
|
|
let statisticsCreated = 0;
|
|
let skipped = 0;
|
|
let totalResumesProcessed = 0;
|
|
let errors = 0;
|
|
|
|
// Load saved progress if exists
|
|
const savedProgress = await loadProgress();
|
|
if (savedProgress) {
|
|
lastSeenCreatedAt = savedProgress.lastSeenCreatedAt;
|
|
lastSeenId = savedProgress.lastSeenId;
|
|
resumesCreated = savedProgress.resumesCreated;
|
|
statisticsCreated = savedProgress.statisticsCreated;
|
|
skipped = savedProgress.skipped;
|
|
totalResumesProcessed = savedProgress.totalResumesProcessed;
|
|
errors = savedProgress.errors;
|
|
}
|
|
|
|
// Helper to get current progress object
|
|
const getCurrentProgress = (): MigrationProgress => ({
|
|
lastSeenCreatedAt,
|
|
lastSeenId,
|
|
resumesCreated,
|
|
statisticsCreated,
|
|
skipped,
|
|
totalResumesProcessed,
|
|
errors,
|
|
lastUpdated: new Date().toISOString(),
|
|
});
|
|
|
|
// Setup graceful shutdown handler
|
|
const handleShutdown = async () => {
|
|
if (shutdownRequested) return;
|
|
shutdownRequested = true;
|
|
console.log("\n⚠️ Shutdown requested. Saving progress...");
|
|
await saveProgress(getCurrentProgress());
|
|
await saveResumeIdMapToFile(resumeIdMap);
|
|
console.log("👋 Exiting. Run the script again to resume from where you left off.");
|
|
process.exit(0);
|
|
};
|
|
|
|
process.on("exit", handleShutdown);
|
|
|
|
// Initialize the importer
|
|
const importer = new ReactiveResumeV4JSONImporter();
|
|
|
|
while (hasMore) {
|
|
// Check if shutdown was requested
|
|
if (shutdownRequested) break;
|
|
|
|
console.log(
|
|
`📥 Fetching resumes batch from production database (cursor: createdAt=${lastSeenCreatedAt}, id=${lastSeenId})...`,
|
|
);
|
|
|
|
// Use cursor-based pagination for better performance
|
|
// Tuple comparison syntax allows Postgres to use composite index efficiently
|
|
let resumes: ProductionResume[];
|
|
|
|
if (lastSeenCreatedAt && lastSeenId) {
|
|
const result = (await productionDb.execute(sql`
|
|
SELECT id, title, slug, data, visibility, locked, "userId", "createdAt", "updatedAt"
|
|
FROM "Resume"
|
|
WHERE ("createdAt", id) < (${lastSeenCreatedAt}::timestamp, ${lastSeenId})
|
|
ORDER BY "createdAt" DESC, id DESC
|
|
LIMIT ${BATCH_SIZE}
|
|
`)) as unknown as QueryResult<ProductionResume>;
|
|
resumes = result.rows;
|
|
} else {
|
|
const result = (await productionDb.execute(sql`
|
|
SELECT id, title, slug, data, visibility, locked, "userId", "createdAt", "updatedAt"
|
|
FROM "Resume"
|
|
ORDER BY "createdAt" DESC, id DESC
|
|
LIMIT ${BATCH_SIZE}
|
|
`)) as unknown as QueryResult<ProductionResume>;
|
|
resumes = result.rows;
|
|
}
|
|
|
|
console.log(`📋 Found ${resumes.length} resumes in this batch.`);
|
|
|
|
if (resumes.length === 0) {
|
|
hasMore = false;
|
|
break;
|
|
}
|
|
|
|
// Fetch statistics only for these resumes in this batch
|
|
const resumeIds = resumes.map((r) => r.id);
|
|
|
|
// Drizzle does not interpolate arrays, so we join and use a custom SQL string
|
|
// Escape single quotes in IDs (though UUIDs shouldn't contain them, this is safer)
|
|
const resumeIdsForSql = resumeIds.map((id) => `'${id.replace(/'/g, "''")}'`).join(", ");
|
|
|
|
const { rows: statistics } = (await productionDb.execute(sql`
|
|
SELECT id, views, downloads, "resumeId", "createdAt", "updatedAt"
|
|
FROM "Statistics"
|
|
WHERE "resumeId" IN (${sql.raw(resumeIdsForSql)})
|
|
`)) as unknown as QueryResult<ProductionStatistics>;
|
|
|
|
// Create a map of resumeId -> statistics for quick lookup
|
|
const statisticsMap = new Map<string, ProductionStatistics>();
|
|
for (const stat of statistics) {
|
|
statisticsMap.set(stat.resumeId, stat);
|
|
}
|
|
|
|
// Filter out resumes where userId is not in userIdMap
|
|
const resumesToProcess = resumes
|
|
.map((resume) => {
|
|
const newUserId = userIdMap.get(resume.userId);
|
|
if (!newUserId) {
|
|
skipped++;
|
|
return null;
|
|
}
|
|
return { resume, newUserId };
|
|
})
|
|
.filter((item): item is NonNullable<typeof item> => item !== null);
|
|
|
|
if (resumesToProcess.length === 0) {
|
|
console.log(`⏭️ All resumes in this batch have userIds not found in userIdMap.`);
|
|
// Update cursor to the last resume in this batch
|
|
const lastResume = resumes[resumes.length - 1];
|
|
if (lastResume) {
|
|
lastSeenCreatedAt =
|
|
lastResume.createdAt instanceof Date ? lastResume.createdAt.toISOString() : String(lastResume.createdAt);
|
|
lastSeenId = lastResume.id;
|
|
}
|
|
totalResumesProcessed += resumes.length;
|
|
await saveProgress(getCurrentProgress());
|
|
continue;
|
|
}
|
|
|
|
// Get unique userIds and bulk check if they exist in local database
|
|
const uniqueUserIds = [...new Set(resumesToProcess.map((r) => r.newUserId))];
|
|
const existingUsers = await localDb.select().from(schema.user).where(inArray(schema.user.id, uniqueUserIds));
|
|
|
|
const existingUserIds = new Set(existingUsers.map((u) => u.id));
|
|
|
|
// Filter out resumes where user doesn't exist
|
|
const resumesWithValidUsers = resumesToProcess.filter(({ newUserId }) => {
|
|
if (!existingUserIds.has(newUserId)) {
|
|
skipped++;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
|
|
if (resumesWithValidUsers.length === 0) {
|
|
console.log(`⏭️ All resumes in this batch have userIds not found in local database.`);
|
|
// Update cursor to the last resume in this batch
|
|
const lastResume = resumes[resumes.length - 1];
|
|
if (lastResume) {
|
|
lastSeenCreatedAt =
|
|
lastResume.createdAt instanceof Date ? lastResume.createdAt.toISOString() : String(lastResume.createdAt);
|
|
lastSeenId = lastResume.id;
|
|
}
|
|
totalResumesProcessed += resumes.length;
|
|
await saveProgress(getCurrentProgress());
|
|
continue;
|
|
}
|
|
|
|
// Bulk check for existing resumes (by slug + userId)
|
|
// We need to check each unique combination
|
|
const slugUserIdPairs = resumesWithValidUsers.map(({ resume, newUserId }) => ({
|
|
slug: resume.slug,
|
|
userId: newUserId,
|
|
}));
|
|
|
|
// Get all unique slugs and userIds
|
|
const uniqueSlugs = [...new Set(slugUserIdPairs.map((p) => p.slug))];
|
|
const userIdsForSlugCheck = [...new Set(slugUserIdPairs.map((p) => p.userId))];
|
|
|
|
// Fetch all existing resumes that match any of our slugs and userIds
|
|
const existingResumes = await localDb
|
|
.select()
|
|
.from(schema.resume)
|
|
.where(and(inArray(schema.resume.slug, uniqueSlugs), inArray(schema.resume.userId, userIdsForSlugCheck)));
|
|
|
|
// Create a set of existing slug+userId combinations
|
|
const existingResumeKeys = new Set(existingResumes.map((r) => `${r.slug}:${r.userId}`));
|
|
|
|
// Filter out resumes that already exist
|
|
const resumesToInsert = resumesWithValidUsers.filter(({ resume, newUserId }) => {
|
|
const key = `${resume.slug}:${newUserId}`;
|
|
if (existingResumeKeys.has(key)) {
|
|
skipped++;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
|
|
if (resumesToInsert.length === 0) {
|
|
console.log(`⏭️ All resumes in this batch already exist in target DB.`);
|
|
// Update cursor to the last resume in this batch
|
|
const lastResume = resumes[resumes.length - 1];
|
|
if (lastResume) {
|
|
lastSeenCreatedAt =
|
|
lastResume.createdAt instanceof Date ? lastResume.createdAt.toISOString() : String(lastResume.createdAt);
|
|
lastSeenId = lastResume.id;
|
|
}
|
|
totalResumesProcessed += resumes.length;
|
|
await saveProgress(getCurrentProgress());
|
|
continue;
|
|
}
|
|
|
|
console.log(`📝 Preparing to bulk insert ${resumesToInsert.length} resumes...`);
|
|
|
|
// Prepare bulk insert data
|
|
const batchStart = performance.now();
|
|
try {
|
|
const resumesToInsertData = resumesToInsert.map(({ resume, newUserId }) => {
|
|
// Transform the data using the V4 importer
|
|
let transformedData = defaultResumeData;
|
|
try {
|
|
const dataJson = typeof resume.data === "string" ? resume.data : JSON.stringify(resume.data);
|
|
transformedData = importer.parse(dataJson);
|
|
} catch (error) {
|
|
console.error(`⚠️ Failed to parse resume data for resume ${resume.id}, using default data:`, error);
|
|
// Use default data if parsing fails
|
|
transformedData = defaultResumeData;
|
|
}
|
|
|
|
// Map visibility to isPublic (visibility === "public" -> isPublic = true)
|
|
const isPublic = resume.visibility === "public";
|
|
|
|
const newResumeId = generateId();
|
|
|
|
// Track the ID mapping for future reference
|
|
resumeIdMap.set(resume.id, newResumeId);
|
|
|
|
return {
|
|
resumeData: {
|
|
id: newResumeId,
|
|
name: resume.title,
|
|
slug: resume.slug,
|
|
tags: [], // Default empty array
|
|
isPublic: isPublic,
|
|
isLocked: resume.locked,
|
|
password: null, // No password in old schema
|
|
data: transformedData,
|
|
userId: newUserId,
|
|
createdAt: resume.createdAt,
|
|
updatedAt: resume.updatedAt,
|
|
},
|
|
originalResumeId: resume.id,
|
|
newResumeId: newResumeId,
|
|
};
|
|
});
|
|
|
|
// Bulk insert resumes (chunked to avoid PostgreSQL message size limits)
|
|
// Resumes contain large JSONB data, so we use smaller chunks
|
|
const resumeDataList = resumesToInsertData.map(({ resumeData }) => resumeData);
|
|
for (let i = 0; i < resumeDataList.length; i += INSERT_CHUNK_SIZE) {
|
|
const chunk = resumeDataList.slice(i, i + INSERT_CHUNK_SIZE);
|
|
await localDb.insert(schema.resume).values(chunk);
|
|
}
|
|
resumesCreated += resumesToInsertData.length;
|
|
|
|
// Prepare statistics for bulk insert
|
|
const statisticsToInsert = resumesToInsertData
|
|
.map(({ originalResumeId, newResumeId }) => {
|
|
const resumeStatistics = statisticsMap.get(originalResumeId);
|
|
if (!resumeStatistics) return null;
|
|
|
|
return {
|
|
id: generateId(),
|
|
views: resumeStatistics.views,
|
|
downloads: resumeStatistics.downloads,
|
|
lastViewedAt: null, // Not available in old schema
|
|
lastDownloadedAt: null, // Not available in old schema
|
|
resumeId: newResumeId,
|
|
createdAt: resumeStatistics.createdAt,
|
|
updatedAt: resumeStatistics.updatedAt,
|
|
};
|
|
})
|
|
.filter((stat): stat is NonNullable<typeof stat> => stat !== null);
|
|
|
|
// Bulk insert statistics (chunked)
|
|
if (statisticsToInsert.length > 0) {
|
|
for (let i = 0; i < statisticsToInsert.length; i += INSERT_CHUNK_SIZE) {
|
|
const chunk = statisticsToInsert.slice(i, i + INSERT_CHUNK_SIZE);
|
|
await localDb.insert(schema.resumeStatistics).values(chunk);
|
|
}
|
|
statisticsCreated += statisticsToInsert.length;
|
|
}
|
|
|
|
const batchEnd = performance.now();
|
|
const batchTimeMs = batchEnd - batchStart;
|
|
console.log(
|
|
`✅ Bulk inserted ${resumesToInsertData.length} resumes in ${batchTimeMs.toFixed(1)} ms (avg ${(batchTimeMs / resumesToInsertData.length).toFixed(1)} ms/resume)`,
|
|
);
|
|
|
|
// Save resume ID map after each successful batch
|
|
await saveResumeIdMapToFile(resumeIdMap);
|
|
} catch (error) {
|
|
console.error(`🚨 Failed to bulk insert resumes batch:`, error);
|
|
errors++;
|
|
// Continue with next batch even if this one fails
|
|
}
|
|
|
|
// Update cursor to the last resume in this batch
|
|
const lastResume = resumes[resumes.length - 1];
|
|
if (lastResume) {
|
|
lastSeenCreatedAt =
|
|
lastResume.createdAt instanceof Date ? lastResume.createdAt.toISOString() : String(lastResume.createdAt);
|
|
lastSeenId = lastResume.id;
|
|
}
|
|
|
|
totalResumesProcessed += resumes.length;
|
|
console.log(`📦 Processed ${totalResumesProcessed} resumes so far...\n`);
|
|
|
|
// Save progress after each batch
|
|
await saveProgress(getCurrentProgress());
|
|
}
|
|
|
|
// Remove signal handlers
|
|
process.off("exit", handleShutdown);
|
|
|
|
const migrationEnd = performance.now();
|
|
const migrationDurationMs = migrationEnd - migrationStart;
|
|
|
|
console.log("\n📊 Migration Summary:");
|
|
console.log(` Resumes created: ${resumesCreated}`);
|
|
console.log(` Statistics created: ${statisticsCreated}`);
|
|
console.log(` Skipped (userId not found or already exist): ${skipped}`);
|
|
console.log(` Errors: ${errors}`);
|
|
console.log(
|
|
`⏱️ Total migration time: ${migrationDurationMs.toFixed(1)} ms (${(migrationDurationMs / 1000).toFixed(2)} seconds)`,
|
|
);
|
|
|
|
// Final save of the mapping (ensures up-to-date state)
|
|
await saveResumeIdMapToFile(resumeIdMap);
|
|
|
|
// Clear progress file on successful completion (only if not interrupted)
|
|
if (!shutdownRequested) {
|
|
await clearProgress();
|
|
console.log("✅ Resume migration complete!");
|
|
} else {
|
|
console.log("⏸️ Migration paused. Run again to resume.");
|
|
}
|
|
}
|
|
|
|
if (import.meta.main) {
|
|
// Reset shutdown flag for fresh run
|
|
shutdownRequested = false;
|
|
|
|
try {
|
|
await migrateResumes();
|
|
} finally {
|
|
await productionPool.end();
|
|
await localPool.end();
|
|
}
|
|
}
|