import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; import { MultipartFile } from '@fastify/multipart'; import { sanitize } from 'sanitize-filename-ts'; import * as path from 'path'; import { htmlToJson, jsonToText, tiptapExtensions, } from '../../collaboration/collaboration.util'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB } from '@docmost/db/types/kysely.types'; import { generateSlugId } from '../../common/helpers'; import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { TiptapTransformer } from '@hocuspocus/transformer'; import * as Y from 'yjs'; import { markdownToHtml } from '@docmost/editor-ext'; import { FileTaskStatus, FileTaskType, getFileTaskFolderPath, } from './file.utils'; import { v7, v7 as uuid7 } from 'uuid'; import { StorageService } from '../storage/storage.service'; import { InjectQueue } from '@nestjs/bullmq'; import { Queue } from 'bullmq'; import { QueueJob, QueueName } from '../queue/constants'; import { Node as PMNode } from '@tiptap/pm/model'; import { EditorState, Transaction } from '@tiptap/pm/state'; import { getSchema } from '@tiptap/core'; @Injectable() export class ImportService { private readonly logger = new Logger(ImportService.name); constructor( private readonly pageRepo: PageRepo, private readonly storageService: StorageService, @InjectKysely() private readonly db: KyselyDB, @InjectQueue(QueueName.FILE_TASK_QUEUE) private readonly fileTaskQueue: Queue, ) {} async importPage( filePromise: Promise, userId: string, spaceId: string, workspaceId: string, ): Promise { const file = await filePromise; const fileBuffer = await file.toBuffer(); const fileExtension = path.extname(file.filename).toLowerCase(); const fileName = sanitize( path.basename(file.filename, fileExtension).slice(0, 255), ); const fileContent = fileBuffer.toString(); let prosemirrorState = null; let createdPage = null; try { if (fileExtension.endsWith('.md')) { prosemirrorState = await this.processMarkdown(fileContent); } else if (fileExtension.endsWith('.html')) { prosemirrorState = await this.processHTML(fileContent); } } catch (err) { const message = 'Error processing file content'; this.logger.error(message, err); throw new BadRequestException(message); } if (!prosemirrorState) { const message = 'Failed to create ProseMirror state'; this.logger.error(message); throw new BadRequestException(message); } const { title, prosemirrorJson } = this.extractTitleAndRemoveHeading(prosemirrorState); const pageTitle = title || fileName; if (prosemirrorJson) { try { const pagePosition = await this.getNewPagePosition(spaceId); createdPage = await this.pageRepo.insertPage({ slugId: generateSlugId(), title: pageTitle, content: prosemirrorJson, textContent: jsonToText(prosemirrorJson), ydoc: await this.createYdoc(prosemirrorJson), position: pagePosition, spaceId: spaceId, creatorId: userId, workspaceId: workspaceId, lastUpdatedById: userId, }); this.logger.debug( `Successfully imported "${title}${fileExtension}. ID: ${createdPage.id} - SlugId: ${createdPage.slugId}"`, ); } catch (err) { const message = 'Failed to create imported page'; this.logger.error(message, err); throw new BadRequestException(message); } } return createdPage; } async processMarkdown(markdownInput: string): Promise { try { const html = await markdownToHtml(markdownInput); return this.processHTML(html); } catch (err) { throw err; } } async processHTML(htmlInput: string): Promise { try { return htmlToJson(htmlInput); } catch (err) { throw err; } } async createYdoc(prosemirrorJson: any): Promise { if (prosemirrorJson) { // this.logger.debug(`Converting prosemirror json state to ydoc`); const ydoc = TiptapTransformer.toYdoc( prosemirrorJson, 'default', tiptapExtensions, ); Y.encodeStateAsUpdate(ydoc); return Buffer.from(Y.encodeStateAsUpdate(ydoc)); } return null; } extractTitleAndRemoveHeading(prosemirrorState: any) { let title: string | null = null; const content = prosemirrorState.content ?? []; if ( content.length > 0 && content[0].type === 'heading' && content[0].attrs?.level === 1 ) { title = content[0].content?.[0]?.text ?? null; content.shift(); } // ensure at least one paragraph if (content.length === 0) { content.push({ type: 'paragraph', content: [], }); } return { title, prosemirrorJson: { ...prosemirrorState, content, }, }; } async getNewPagePosition(spaceId: string): Promise { const lastPage = await this.db .selectFrom('pages') .select(['id', 'position']) .where('spaceId', '=', spaceId) .orderBy('position', 'desc') .limit(1) .where('parentPageId', 'is', null) .executeTakeFirst(); if (lastPage) { return generateJitteredKeyBetween(lastPage.position, null); } else { return generateJitteredKeyBetween(null, null); } } async importZip( filePromise: Promise, source: string, userId: string, spaceId: string, workspaceId: string, ): Promise { const file = await filePromise; const fileBuffer = await file.toBuffer(); const fileExtension = path.extname(file.filename).toLowerCase(); const fileName = sanitize( path.basename(file.filename, fileExtension).slice(0, 255), ); const fileTaskId = uuid7(); const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileName}`; // upload file await this.storageService.upload(filePath, fileBuffer); // store in fileTasks table await this.db .insertInto('fileTasks') .values({ id: fileTaskId, type: FileTaskType.Import, source: source, status: FileTaskStatus.Pending, fileName: fileName, filePath: filePath, fileSize: 0, fileExt: 'zip', creatorId: userId, spaceId: spaceId, workspaceId: workspaceId, }) .execute(); // what to send to queue // pass the task ID await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, { fileTaskId: fileTaskId, }); // return tasks info // when the processor picks it up // we change the status to processing // if it gets processed successfully, // we change the status to success // else failed } async markdownOrHtmlToProsemirror( fileContent: string, fileExtension: string, ): Promise { let prosemirrorState = ''; if (fileExtension === '.md') { prosemirrorState = await this.processMarkdown(fileContent); } else if (fileExtension.endsWith('.html')) { prosemirrorState = await this.processHTML(fileContent); } return prosemirrorState; } async convertInternalLinksToMentionsPM( doc: PMNode, currentFilePath: string, filePathToPageMetaMap: Map< string, { id: string; title: string; slugId: string } >, ): Promise { const schema = getSchema(tiptapExtensions); const state = EditorState.create({ doc, schema }); let tr: Transaction = state.tr; const normalizePath = (p: string) => p.replace(/\\/g, '/'); // Collect replacements from the original doc. const replacements: Array<{ from: number; to: number; mentionNode: PMNode; }> = []; doc.descendants((node, pos) => { if (!node.isText || !node.marks?.length) return; // Look for the link mark const linkMark = node.marks.find( (mark) => mark.type.name === 'link' && mark.attrs?.href, ); if (!linkMark) return; // Compute the range for the entire text node. const from = pos; const to = pos + node.nodeSize; // Resolve the path and get page meta. const resolvedPath = normalizePath( path.join(path.dirname(currentFilePath), linkMark.attrs.href), ); const pageMeta = filePathToPageMetaMap.get(resolvedPath); if (!pageMeta) return; // Create the mention node with all required attributes. const mentionNode = schema.nodes.mention.create({ id: v7(), entityType: 'page', entityId: pageMeta.id, label: node.text || pageMeta.title, slugId: pageMeta.slugId, creatorId: 'not available', // This is required per your schema. }); replacements.push({ from, to, mentionNode }); }); // Apply replacements in reverse order. for (let i = replacements.length - 1; i >= 0; i--) { const { from, to, mentionNode } = replacements[i]; try { tr = tr.replaceWith(from, to, mentionNode); } catch (err) { console.error('❌ Failed to insert mention:', err); } } if (tr.docChanged) { console.log('doc changed'); console.log(JSON.stringify(state.apply(tr).doc.toJSON())); } // Return the updated document if any change was made. return tr.docChanged ? state.apply(tr).doc : doc; } }