mirror of
https://github.com/docmost/docmost.git
synced 2025-11-10 02:02:06 +10:00
fix: enhance page import (#1570)
* change import process * fix processor * fix page name in notion import * preserve confluence table bg color * sync
This commit is contained in:
@ -47,15 +47,23 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
|||||||
await this.handleFailedJob(job);
|
await this.handleFailedJob(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
@OnWorkerEvent('stalled')
|
@OnWorkerEvent('completed')
|
||||||
async onStalled(job: Job) {
|
async onCompleted(job: Job) {
|
||||||
this.logger.error(
|
this.logger.log(
|
||||||
`Job ${job.name} stalled. . Import Task ID: ${job.data.fileTaskId}.. Job ID: ${job.id}`,
|
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Set failedReason for stalled jobs since it's not automatically set
|
try {
|
||||||
job.failedReason = 'Job stalled and was marked as failed';
|
const fileTask = await this.fileTaskService.getFileTask(
|
||||||
await this.handleFailedJob(job);
|
job.data.fileTaskId,
|
||||||
|
);
|
||||||
|
if (fileTask) {
|
||||||
|
await this.storageService.delete(fileTask.filePath);
|
||||||
|
this.logger.debug(`Deleted imported zip file: ${fileTask.filePath}`);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(`Failed to delete imported zip file:`, err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async handleFailedJob(job: Job) {
|
private async handleFailedJob(job: Job) {
|
||||||
@ -78,25 +86,6 @@ export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@OnWorkerEvent('completed')
|
|
||||||
async onCompleted(job: Job) {
|
|
||||||
this.logger.log(
|
|
||||||
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
|
|
||||||
);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const fileTask = await this.fileTaskService.getFileTask(
|
|
||||||
job.data.fileTaskId,
|
|
||||||
);
|
|
||||||
if (fileTask) {
|
|
||||||
await this.storageService.delete(fileTask.filePath);
|
|
||||||
this.logger.debug(`Deleted imported zip file: ${fileTask.filePath}`);
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
this.logger.error(`Failed to delete imported zip file:`, err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async onModuleDestroy(): Promise<void> {
|
async onModuleDestroy(): Promise<void> {
|
||||||
if (this.worker) {
|
if (this.worker) {
|
||||||
await this.worker.close();
|
await this.worker.close();
|
||||||
|
|||||||
@ -24,6 +24,7 @@ import { formatImportHtml } from '../utils/import-formatter';
|
|||||||
import {
|
import {
|
||||||
buildAttachmentCandidates,
|
buildAttachmentCandidates,
|
||||||
collectMarkdownAndHtmlFiles,
|
collectMarkdownAndHtmlFiles,
|
||||||
|
stripNotionID,
|
||||||
} from '../utils/import.utils';
|
} from '../utils/import.utils';
|
||||||
import { executeTx } from '@docmost/db/utils';
|
import { executeTx } from '@docmost/db/utils';
|
||||||
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
||||||
@ -159,17 +160,12 @@ export class FileImportTaskService {
|
|||||||
.split(path.sep)
|
.split(path.sep)
|
||||||
.join('/'); // normalize to forward-slashes
|
.join('/'); // normalize to forward-slashes
|
||||||
const ext = path.extname(relPath).toLowerCase();
|
const ext = path.extname(relPath).toLowerCase();
|
||||||
let content = await fs.readFile(absPath, 'utf-8');
|
|
||||||
|
|
||||||
if (ext.toLowerCase() === '.md') {
|
|
||||||
content = await markdownToHtml(content);
|
|
||||||
}
|
|
||||||
|
|
||||||
pagesMap.set(relPath, {
|
pagesMap.set(relPath, {
|
||||||
id: v7(),
|
id: v7(),
|
||||||
slugId: generateSlugId(),
|
slugId: generateSlugId(),
|
||||||
name: path.basename(relPath, ext),
|
name: stripNotionID(path.basename(relPath, ext)),
|
||||||
content,
|
content: '',
|
||||||
parentPageId: null,
|
parentPageId: null,
|
||||||
fileExtension: ext,
|
fileExtension: ext,
|
||||||
filePath: relPath,
|
filePath: relPath,
|
||||||
@ -254,71 +250,160 @@ export class FileImportTaskService {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const pageResults = await Promise.all(
|
// Group pages by level (topological sort for parent-child relationships)
|
||||||
Array.from(pagesMap.values()).map(async (page) => {
|
const pagesByLevel = new Map<number, Array<[string, ImportPageNode]>>();
|
||||||
const htmlContent =
|
const pageLevel = new Map<string, number>();
|
||||||
await this.importAttachmentService.processAttachments({
|
|
||||||
html: page.content,
|
|
||||||
pageRelativePath: page.filePath,
|
|
||||||
extractDir,
|
|
||||||
pageId: page.id,
|
|
||||||
fileTask,
|
|
||||||
attachmentCandidates,
|
|
||||||
});
|
|
||||||
|
|
||||||
const { html, backlinks, pageIcon } = await formatImportHtml({
|
// Calculate levels using BFS
|
||||||
html: htmlContent,
|
const calculateLevels = () => {
|
||||||
currentFilePath: page.filePath,
|
const queue: Array<{ filePath: string; level: number }> = [];
|
||||||
filePathToPageMetaMap: filePathToPageMetaMap,
|
|
||||||
creatorId: fileTask.creatorId,
|
|
||||||
sourcePageId: page.id,
|
|
||||||
workspaceId: fileTask.workspaceId,
|
|
||||||
});
|
|
||||||
|
|
||||||
const pmState = getProsemirrorContent(
|
// Start with root pages (no parent)
|
||||||
await this.importService.processHTML(html),
|
for (const [filePath, page] of pagesMap.entries()) {
|
||||||
|
if (!page.parentPageId) {
|
||||||
|
queue.push({ filePath, level: 0 });
|
||||||
|
pageLevel.set(filePath, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BFS to assign levels
|
||||||
|
while (queue.length > 0) {
|
||||||
|
const { filePath, level } = queue.shift()!;
|
||||||
|
const currentPage = pagesMap.get(filePath)!;
|
||||||
|
|
||||||
|
// Find children of current page
|
||||||
|
for (const [childFilePath, childPage] of pagesMap.entries()) {
|
||||||
|
if (
|
||||||
|
childPage.parentPageId === currentPage.id &&
|
||||||
|
!pageLevel.has(childFilePath)
|
||||||
|
) {
|
||||||
|
pageLevel.set(childFilePath, level + 1);
|
||||||
|
queue.push({ filePath: childFilePath, level: level + 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group pages by level
|
||||||
|
for (const [filePath, page] of pagesMap.entries()) {
|
||||||
|
const level = pageLevel.get(filePath) || 0;
|
||||||
|
if (!pagesByLevel.has(level)) {
|
||||||
|
pagesByLevel.set(level, []);
|
||||||
|
}
|
||||||
|
pagesByLevel.get(level)!.push([filePath, page]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
calculateLevels();
|
||||||
|
|
||||||
|
if (pagesMap.size < 1) return;
|
||||||
|
|
||||||
|
// Process pages level by level sequentially to respect foreign key constraints
|
||||||
|
const allBacklinks: any[] = [];
|
||||||
|
const validPageIds = new Set<string>();
|
||||||
|
let totalPagesProcessed = 0;
|
||||||
|
|
||||||
|
// Sort levels to process in order
|
||||||
|
const sortedLevels = Array.from(pagesByLevel.keys()).sort((a, b) => a - b);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await executeTx(this.db, async (trx) => {
|
||||||
|
// Process pages level by level sequentially within the transaction
|
||||||
|
for (const level of sortedLevels) {
|
||||||
|
const levelPages = pagesByLevel.get(level)!;
|
||||||
|
|
||||||
|
for (const [filePath, page] of levelPages) {
|
||||||
|
const absPath = path.join(extractDir, filePath);
|
||||||
|
let content = await fs.readFile(absPath, 'utf-8');
|
||||||
|
|
||||||
|
if (page.fileExtension.toLowerCase() === '.md') {
|
||||||
|
content = await markdownToHtml(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
const htmlContent =
|
||||||
|
await this.importAttachmentService.processAttachments({
|
||||||
|
html: content,
|
||||||
|
pageRelativePath: page.filePath,
|
||||||
|
extractDir,
|
||||||
|
pageId: page.id,
|
||||||
|
fileTask,
|
||||||
|
attachmentCandidates,
|
||||||
|
});
|
||||||
|
|
||||||
|
const { html, backlinks, pageIcon } = await formatImportHtml({
|
||||||
|
html: htmlContent,
|
||||||
|
currentFilePath: page.filePath,
|
||||||
|
filePathToPageMetaMap: filePathToPageMetaMap,
|
||||||
|
creatorId: fileTask.creatorId,
|
||||||
|
sourcePageId: page.id,
|
||||||
|
workspaceId: fileTask.workspaceId,
|
||||||
|
});
|
||||||
|
|
||||||
|
const pmState = getProsemirrorContent(
|
||||||
|
await this.importService.processHTML(html),
|
||||||
|
);
|
||||||
|
|
||||||
|
const { title, prosemirrorJson } =
|
||||||
|
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||||
|
|
||||||
|
const insertablePage: InsertablePage = {
|
||||||
|
id: page.id,
|
||||||
|
slugId: page.slugId,
|
||||||
|
title: title || page.name,
|
||||||
|
icon: pageIcon || null,
|
||||||
|
content: prosemirrorJson,
|
||||||
|
textContent: jsonToText(prosemirrorJson),
|
||||||
|
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
||||||
|
position: page.position!,
|
||||||
|
spaceId: fileTask.spaceId,
|
||||||
|
workspaceId: fileTask.workspaceId,
|
||||||
|
creatorId: fileTask.creatorId,
|
||||||
|
lastUpdatedById: fileTask.creatorId,
|
||||||
|
parentPageId: page.parentPageId,
|
||||||
|
};
|
||||||
|
|
||||||
|
await trx.insertInto('pages').values(insertablePage).execute();
|
||||||
|
|
||||||
|
// Track valid page IDs and collect backlinks
|
||||||
|
validPageIds.add(insertablePage.id);
|
||||||
|
allBacklinks.push(...backlinks);
|
||||||
|
totalPagesProcessed++;
|
||||||
|
|
||||||
|
// Log progress periodically
|
||||||
|
if (totalPagesProcessed % 50 === 0) {
|
||||||
|
this.logger.debug(`Processed ${totalPagesProcessed} pages...`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const filteredBacklinks = allBacklinks.filter(
|
||||||
|
({ sourcePageId, targetPageId }) =>
|
||||||
|
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
||||||
);
|
);
|
||||||
|
|
||||||
const { title, prosemirrorJson } =
|
// Insert backlinks in batches
|
||||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
if (filteredBacklinks.length > 0) {
|
||||||
|
const BACKLINK_BATCH_SIZE = 100;
|
||||||
|
for (
|
||||||
|
let i = 0;
|
||||||
|
i < filteredBacklinks.length;
|
||||||
|
i += BACKLINK_BATCH_SIZE
|
||||||
|
) {
|
||||||
|
const backlinkChunk = filteredBacklinks.slice(
|
||||||
|
i,
|
||||||
|
Math.min(i + BACKLINK_BATCH_SIZE, filteredBacklinks.length),
|
||||||
|
);
|
||||||
|
await this.backlinkRepo.insertBacklink(backlinkChunk, trx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const insertablePage: InsertablePage = {
|
this.logger.log(
|
||||||
id: page.id,
|
`Successfully imported ${totalPagesProcessed} pages with ${filteredBacklinks.length} backlinks`,
|
||||||
slugId: page.slugId,
|
);
|
||||||
title: title || page.name,
|
});
|
||||||
icon: pageIcon || null,
|
} catch (error) {
|
||||||
content: prosemirrorJson,
|
this.logger.error('Failed to import files:', error);
|
||||||
textContent: jsonToText(prosemirrorJson),
|
throw new Error(`File import failed: ${error?.['message']}`);
|
||||||
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
}
|
||||||
position: page.position!,
|
|
||||||
spaceId: fileTask.spaceId,
|
|
||||||
workspaceId: fileTask.workspaceId,
|
|
||||||
creatorId: fileTask.creatorId,
|
|
||||||
lastUpdatedById: fileTask.creatorId,
|
|
||||||
parentPageId: page.parentPageId,
|
|
||||||
};
|
|
||||||
|
|
||||||
return { insertablePage, backlinks };
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
const insertablePages = pageResults.map((r) => r.insertablePage);
|
|
||||||
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
|
|
||||||
|
|
||||||
if (insertablePages.length < 1) return;
|
|
||||||
const validPageIds = new Set(insertablePages.map((row) => row.id));
|
|
||||||
const filteredBacklinks = insertableBacklinks.filter(
|
|
||||||
({ sourcePageId, targetPageId }) =>
|
|
||||||
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
|
||||||
);
|
|
||||||
|
|
||||||
await executeTx(this.db, async (trx) => {
|
|
||||||
await trx.insertInto('pages').values(insertablePages).execute();
|
|
||||||
|
|
||||||
if (filteredBacklinks.length > 0) {
|
|
||||||
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async getFileTask(fileTaskId: string) {
|
async getFileTask(fileTaskId: string) {
|
||||||
|
|||||||
@ -35,7 +35,7 @@ interface DrawioPair {
|
|||||||
@Injectable()
|
@Injectable()
|
||||||
export class ImportAttachmentService {
|
export class ImportAttachmentService {
|
||||||
private readonly logger = new Logger(ImportAttachmentService.name);
|
private readonly logger = new Logger(ImportAttachmentService.name);
|
||||||
private readonly CONCURRENT_UPLOADS = 1;
|
private readonly CONCURRENT_UPLOADS = 3;
|
||||||
private readonly MAX_RETRIES = 2;
|
private readonly MAX_RETRIES = 2;
|
||||||
private readonly RETRY_DELAY = 2000;
|
private readonly RETRY_DELAY = 2000;
|
||||||
|
|
||||||
|
|||||||
@ -64,3 +64,9 @@ export async function collectMarkdownAndHtmlFiles(
|
|||||||
await walk(dir);
|
await walk(dir);
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function stripNotionID(fileName: string): string {
|
||||||
|
// Handle optional separator (space or dash) + 32 alphanumeric chars at end
|
||||||
|
const notionIdPattern = /[ -]?[a-z0-9]{32}$/i;
|
||||||
|
return fileName.replace(notionIdPattern, '').trim();
|
||||||
|
}
|
||||||
|
|||||||
@ -2,33 +2,39 @@ import { TableCell as TiptapTableCell } from "@tiptap/extension-table-cell";
|
|||||||
|
|
||||||
export const TableCell = TiptapTableCell.extend({
|
export const TableCell = TiptapTableCell.extend({
|
||||||
name: "tableCell",
|
name: "tableCell",
|
||||||
content: "(paragraph | heading | bulletList | orderedList | taskList | blockquote | callout | image | video | attachment | mathBlock | details | codeBlock)+",
|
content:
|
||||||
|
"(paragraph | heading | bulletList | orderedList | taskList | blockquote | callout | image | video | attachment | mathBlock | details | codeBlock)+",
|
||||||
|
|
||||||
addAttributes() {
|
addAttributes() {
|
||||||
return {
|
return {
|
||||||
...this.parent?.(),
|
...this.parent?.(),
|
||||||
backgroundColor: {
|
backgroundColor: {
|
||||||
default: null,
|
default: null,
|
||||||
parseHTML: (element) => element.style.backgroundColor || null,
|
parseHTML: (element) =>
|
||||||
|
element.style.backgroundColor ||
|
||||||
|
element.getAttribute("data-background-color") ||
|
||||||
|
null,
|
||||||
renderHTML: (attributes) => {
|
renderHTML: (attributes) => {
|
||||||
if (!attributes.backgroundColor) {
|
if (!attributes.backgroundColor) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
style: `background-color: ${attributes.backgroundColor}`,
|
style: `background-color: ${attributes.backgroundColor}`,
|
||||||
'data-background-color': attributes.backgroundColor,
|
"data-background-color": attributes.backgroundColor,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
backgroundColorName: {
|
backgroundColorName: {
|
||||||
default: null,
|
default: null,
|
||||||
parseHTML: (element) => element.getAttribute('data-background-color-name') || null,
|
parseHTML: (element) =>
|
||||||
|
element.getAttribute("data-background-color-name") || null,
|
||||||
renderHTML: (attributes) => {
|
renderHTML: (attributes) => {
|
||||||
if (!attributes.backgroundColorName) {
|
if (!attributes.backgroundColorName) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
'data-background-color-name': attributes.backgroundColorName.toLowerCase(),
|
"data-background-color-name":
|
||||||
|
attributes.backgroundColorName.toLowerCase(),
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@ -2,36 +2,42 @@ import { TableHeader as TiptapTableHeader } from "@tiptap/extension-table-header
|
|||||||
|
|
||||||
export const TableHeader = TiptapTableHeader.extend({
|
export const TableHeader = TiptapTableHeader.extend({
|
||||||
name: "tableHeader",
|
name: "tableHeader",
|
||||||
content: "(paragraph | heading | bulletList | orderedList | taskList | blockquote | callout | image | video | attachment | mathBlock | details | codeBlock)+",
|
content:
|
||||||
|
"(paragraph | heading | bulletList | orderedList | taskList | blockquote | callout | image | video | attachment | mathBlock | details | codeBlock)+",
|
||||||
|
|
||||||
addAttributes() {
|
addAttributes() {
|
||||||
return {
|
return {
|
||||||
...this.parent?.(),
|
...this.parent?.(),
|
||||||
backgroundColor: {
|
backgroundColor: {
|
||||||
default: null,
|
default: null,
|
||||||
parseHTML: (element) => element.style.backgroundColor || null,
|
parseHTML: (element) =>
|
||||||
|
element.style.backgroundColor ||
|
||||||
|
element.getAttribute("data-background-color") ||
|
||||||
|
null,
|
||||||
renderHTML: (attributes) => {
|
renderHTML: (attributes) => {
|
||||||
if (!attributes.backgroundColor) {
|
if (!attributes.backgroundColor) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
style: `background-color: ${attributes.backgroundColor}`,
|
style: `background-color: ${attributes.backgroundColor}`,
|
||||||
'data-background-color': attributes.backgroundColor,
|
"data-background-color": attributes.backgroundColor,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
backgroundColorName: {
|
backgroundColorName: {
|
||||||
default: null,
|
default: null,
|
||||||
parseHTML: (element) => element.getAttribute('data-background-color-name') || null,
|
parseHTML: (element) =>
|
||||||
|
element.getAttribute("data-background-color-name") || null,
|
||||||
renderHTML: (attributes) => {
|
renderHTML: (attributes) => {
|
||||||
if (!attributes.backgroundColorName) {
|
if (!attributes.backgroundColorName) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
'data-background-color-name': attributes.backgroundColorName.toLowerCase(),
|
"data-background-color-name":
|
||||||
|
attributes.backgroundColorName.toLowerCase(),
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user