import { Injectable, Logger } from '@nestjs/common'; import * as path from 'path'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB } from '@docmost/db/types/kysely.types'; import { cleanUrlString } from '../utils/file.utils'; import { StorageService } from '../../storage/storage.service'; import { createReadStream } from 'node:fs'; import { promises as fs } from 'fs'; import { Readable } from 'stream'; import { getMimeType, sanitizeFileName } from '../../../common/helpers'; import { v7 } from 'uuid'; import { FileTask } from '@docmost/db/types/entity.types'; import { getAttachmentFolderPath } from '../../../core/attachment/attachment.utils'; import { AttachmentType } from '../../../core/attachment/attachment.constants'; import { unwrapFromParagraph } from '../utils/import-formatter'; import { resolveRelativeAttachmentPath } from '../utils/import.utils'; import { load } from 'cheerio'; import pLimit from 'p-limit'; import { InjectQueue } from '@nestjs/bullmq'; import { Queue } from 'bullmq'; import { QueueJob, QueueName } from '../../queue/constants'; interface AttachmentInfo { href: string; fileName: string; mimeType: string; } interface DrawioPair { drawioFile?: AttachmentInfo; pngFile?: AttachmentInfo; baseName: string; } @Injectable() export class ImportAttachmentService { private readonly logger = new Logger(ImportAttachmentService.name); private readonly CONCURRENT_UPLOADS = 3; private readonly MAX_RETRIES = 2; private readonly RETRY_DELAY = 2000; constructor( private readonly storageService: StorageService, @InjectKysely() private readonly db: KyselyDB, @InjectQueue(QueueName.ATTACHMENT_QUEUE) private attachmentQueue: Queue, ) {} async processAttachments(opts: { html: string; pageRelativePath: string; extractDir: string; pageId: string; fileTask: FileTask; attachmentCandidates: Map; pageAttachments?: AttachmentInfo[]; isConfluenceImport?: boolean; }): Promise { const { html, pageRelativePath, extractDir, pageId, fileTask, attachmentCandidates, pageAttachments = [], isConfluenceImport, } = opts; const attachmentTasks: (() => Promise)[] = []; const limit = pLimit(this.CONCURRENT_UPLOADS); const uploadStats = { total: 0, completed: 0, failed: 0, failedFiles: [] as string[], }; /** * Cache keyed by the *relative* path that appears in the HTML. * Ensures we upload (and DB-insert) each attachment at most once, * even if it's referenced multiple times on the page. */ const processed = new Map< string, { attachmentId: string; storageFilePath: string; apiFilePath: string; fileNameWithExt: string; abs: string; } >(); // Analyze attachments to identify Draw.io pairs const { drawioPairs, skipFiles } = this.analyzeAttachments( pageAttachments, isConfluenceImport, ); // Map to store processed Draw.io SVGs const drawioSvgMap = new Map< string, { attachmentId: string; apiFilePath: string; fileName: string; } >(); //this.logger.debug(`Found ${drawioPairs.size} Draw.io pairs to process`); // Process Draw.io pairs and create combined SVG files for (const [drawioHref, pair] of drawioPairs) { if (!pair.drawioFile) continue; const drawioAbsPath = attachmentCandidates.get(drawioHref); if (!drawioAbsPath) continue; const pngAbsPath = pair.pngFile ? attachmentCandidates.get(pair.pngFile.href) : undefined; try { // Create combined SVG with Draw.io data and PNG image const svgBuffer = await this.createDrawioSvg(drawioAbsPath, pngAbsPath); // Generate file details - always use "diagram.drawio.svg" as filename const attachmentId = v7(); const fileName = 'diagram.drawio.svg'; const storageFilePath = `${getAttachmentFolderPath( AttachmentType.File, fileTask.workspaceId, )}/${attachmentId}/${fileName}`; const apiFilePath = `/api/files/${attachmentId}/${fileName}`; // Upload the SVG file attachmentTasks.push(async () => { try { const stream = Readable.from(svgBuffer); // Upload to storage await this.storageService.uploadStream(storageFilePath, stream, { recreateClient: true, }); // Insert into database await this.db .insertInto('attachments') .values({ id: attachmentId, filePath: storageFilePath, fileName: fileName, fileSize: svgBuffer.length, mimeType: 'image/svg+xml', type: 'file', fileExt: '.svg', creatorId: fileTask.creatorId, workspaceId: fileTask.workspaceId, pageId, spaceId: fileTask.spaceId, }) .execute(); uploadStats.completed++; } catch (error) { uploadStats.failed++; uploadStats.failedFiles.push(fileName); this.logger.error( `Failed to upload Draw.io SVG ${fileName}:`, error, ); } }); // Store the mapping for both Draw.io and PNG references drawioSvgMap.set(drawioHref, { attachmentId, apiFilePath, fileName }); if (pair.pngFile) { drawioSvgMap.set(pair.pngFile.href, { attachmentId, apiFilePath, fileName, }); } } catch (error) { this.logger.error( `Failed to process Draw.io pair ${pair.baseName}:`, error, ); } } const uploadOnce = (relPath: string) => { const abs = attachmentCandidates.get(relPath)!; const attachmentId = v7(); const ext = path.extname(abs); const fileNameWithExt = sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase(); const storageFilePath = `${getAttachmentFolderPath( AttachmentType.File, fileTask.workspaceId, )}/${attachmentId}/${fileNameWithExt}`; const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`; attachmentTasks.push(() => this.uploadWithRetry({ abs, storageFilePath, attachmentId, fileNameWithExt, ext, pageId, fileTask, uploadStats, }), ); return { attachmentId, storageFilePath, apiFilePath, fileNameWithExt, abs, }; }; /** * – Returns cached data if we’ve already processed this path. * – Otherwise calls `uploadOnce`, stores the result, and returns it. */ const processFile = (relPath: string) => { const cached = processed.get(relPath); if (cached) return cached; const fresh = uploadOnce(relPath); processed.set(relPath, fresh); return fresh; }; const pageDir = path.dirname(pageRelativePath); const $ = load(html); // image for (const imgEl of $('img').toArray()) { const $img = $(imgEl); const src = cleanUrlString($img.attr('src') ?? '')!; if (!src || src.startsWith('http')) continue; const relPath = resolveRelativeAttachmentPath( src, pageDir, attachmentCandidates, ); if (!relPath) continue; // Check if this image is part of a Draw.io pair const drawioSvg = drawioSvgMap.get(relPath); if (drawioSvg) { const $drawio = $('
') .attr('data-type', 'drawio') .attr('data-src', drawioSvg.apiFilePath) .attr('data-title', 'diagram') .attr('data-width', '100%') .attr('data-align', 'center') .attr('data-attachment-id', drawioSvg.attachmentId); $img.replaceWith($drawio); unwrapFromParagraph($, $drawio); continue; } const { attachmentId, apiFilePath } = processFile(relPath); const width = $img.attr('width') ?? '100%'; const align = $img.attr('data-align') ?? 'center'; $img .attr('src', apiFilePath) .attr('data-attachment-id', attachmentId) .attr('width', width) .attr('data-align', align); unwrapFromParagraph($, $img); } // video for (const vidEl of $('video').toArray()) { const $vid = $(vidEl); const src = cleanUrlString($vid.attr('src') ?? '')!; if (!src || src.startsWith('http')) continue; const relPath = resolveRelativeAttachmentPath( src, pageDir, attachmentCandidates, ); if (!relPath) continue; const { attachmentId, apiFilePath } = processFile(relPath); const width = $vid.attr('width') ?? '100%'; const align = $vid.attr('data-align') ?? 'center'; $vid .attr('src', apiFilePath) .attr('data-attachment-id', attachmentId) .attr('width', width) .attr('data-align', align); unwrapFromParagraph($, $vid); } //
for (const el of $('div[data-type="attachment"]').toArray()) { const $oldDiv = $(el); const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!; if (!rawUrl || rawUrl.startsWith('http')) continue; const relPath = resolveRelativeAttachmentPath( rawUrl, pageDir, attachmentCandidates, ); if (!relPath) continue; const { attachmentId, apiFilePath, abs } = processFile(relPath); const fileName = path.basename(abs); const mime = getMimeType(abs); const $newDiv = $('
') .attr('data-type', 'attachment') .attr('data-attachment-url', apiFilePath) .attr('data-attachment-name', fileName) .attr('data-attachment-mime', mime) .attr('data-attachment-id', attachmentId); $oldDiv.replaceWith($newDiv); unwrapFromParagraph($, $newDiv); } // rewrite other attachments via for (const aEl of $('a').toArray()) { const $a = $(aEl); const href = cleanUrlString($a.attr('href') ?? '')!; if (!href || href.startsWith('http')) continue; const relPath = resolveRelativeAttachmentPath( href, pageDir, attachmentCandidates, ); if (!relPath) continue; // Check if this is a Draw.io file const drawioSvg = drawioSvgMap.get(relPath); if (drawioSvg) { const $drawio = $('
') .attr('data-type', 'drawio') .attr('data-src', drawioSvg.apiFilePath) .attr('data-title', 'diagram') .attr('data-width', '100%') .attr('data-align', 'center') .attr('data-attachment-id', drawioSvg.attachmentId); $a.replaceWith($drawio); unwrapFromParagraph($, $drawio); continue; } // Skip files that should be ignored if (skipFiles.has(relPath)) { $a.remove(); continue; } const { attachmentId, apiFilePath, abs } = processFile(relPath); const ext = path.extname(relPath).toLowerCase(); if (ext === '.mp4') { const $video = $('