From 1f797c3d2734cccdc0b0b2be67b1e30f71b40d29 Mon Sep 17 00:00:00 2001 From: Philip Okugbe <16838612+Philipinho@users.noreply.github.com> Date: Tue, 2 Sep 2025 21:19:09 -0700 Subject: [PATCH] fix: confluence drawio import (#1518) * POC * WIP - working * WIP * WIP * sync * fix drawio preview image --- apps/server/src/ee | 2 +- .../services/import-attachment.service.ts | 764 ++++++++++++++---- 2 files changed, 595 insertions(+), 171 deletions(-) diff --git a/apps/server/src/ee b/apps/server/src/ee index 3775df60..aa33dcd2 160000 --- a/apps/server/src/ee +++ b/apps/server/src/ee @@ -1 +1 @@ -Subproject commit 3775df60137366b6953d80037f90547fe8ee4ac7 +Subproject commit aa33dcd2ba310705d6799d1e468390774e0cf3e7 diff --git a/apps/server/src/integrations/import/services/import-attachment.service.ts b/apps/server/src/integrations/import/services/import-attachment.service.ts index b9a488a9..874ff892 100644 --- a/apps/server/src/integrations/import/services/import-attachment.service.ts +++ b/apps/server/src/integrations/import/services/import-attachment.service.ts @@ -6,6 +6,7 @@ import { cleanUrlString } from '../utils/file.utils'; import { StorageService } from '../../storage/storage.service'; import { createReadStream } from 'node:fs'; import { promises as fs } from 'fs'; +import { Readable } from 'stream'; import { getMimeType, sanitizeFileName } from '../../../common/helpers'; import { v7 } from 'uuid'; import { FileTask } from '@docmost/db/types/entity.types'; @@ -16,6 +17,18 @@ import { resolveRelativeAttachmentPath } from '../utils/import.utils'; import { load } from 'cheerio'; import pLimit from 'p-limit'; +interface AttachmentInfo { + href: string; + fileName: string; + mimeType: string; +} + +interface DrawioPair { + drawioFile?: AttachmentInfo; + pngFile?: AttachmentInfo; + baseName: string; +} + @Injectable() export class ImportAttachmentService { private readonly logger = new Logger(ImportAttachmentService.name); @@ -35,6 +48,7 @@ export class ImportAttachmentService { pageId: string; fileTask: FileTask; attachmentCandidates: Map; + pageAttachments?: AttachmentInfo[]; }): Promise { const { html, @@ -43,6 +57,7 @@ export class ImportAttachmentService { pageId, fileTask, attachmentCandidates, + pageAttachments = [], } = opts; const attachmentTasks: (() => Promise)[] = []; @@ -57,7 +72,7 @@ export class ImportAttachmentService { /** * Cache keyed by the *relative* path that appears in the HTML. * Ensures we upload (and DB-insert) each attachment at most once, - * even if it’s referenced multiple times on the page. + * even if it's referenced multiple times on the page. */ const processed = new Map< string, @@ -70,6 +85,99 @@ export class ImportAttachmentService { } >(); + // Analyze attachments to identify Draw.io pairs + const { drawioPairs, skipFiles } = this.analyzeAttachments(pageAttachments); + + // Map to store processed Draw.io SVGs + const drawioSvgMap = new Map< + string, + { + attachmentId: string; + apiFilePath: string; + fileName: string; + } + >(); + + this.logger.debug(`Found ${drawioPairs.size} Draw.io pairs to process`); + + // Process Draw.io pairs and create combined SVG files + for (const [drawioHref, pair] of drawioPairs) { + if (!pair.drawioFile) continue; + + const drawioAbsPath = attachmentCandidates.get(drawioHref); + if (!drawioAbsPath) continue; + + const pngAbsPath = pair.pngFile + ? attachmentCandidates.get(pair.pngFile.href) + : undefined; + + try { + // Create combined SVG with Draw.io data and PNG image + const svgBuffer = await this.createDrawioSvg(drawioAbsPath, pngAbsPath); + + // Generate file details - always use "diagram.drawio.svg" as filename + const attachmentId = v7(); + const fileName = 'diagram.drawio.svg'; + const storageFilePath = `${getAttachmentFolderPath( + AttachmentType.File, + fileTask.workspaceId, + )}/${attachmentId}/${fileName}`; + const apiFilePath = `/api/files/${attachmentId}/${fileName}`; + + // Upload the SVG file + attachmentTasks.push(async () => { + try { + const stream = Readable.from(svgBuffer); + + // Upload to storage + await this.storageService.uploadStream(storageFilePath, stream); + + // Insert into database + await this.db + .insertInto('attachments') + .values({ + id: attachmentId, + filePath: storageFilePath, + fileName: fileName, + fileSize: svgBuffer.length, + mimeType: 'image/svg+xml', + type: 'file', + fileExt: '.svg', + creatorId: fileTask.creatorId, + workspaceId: fileTask.workspaceId, + pageId, + spaceId: fileTask.spaceId, + }) + .execute(); + + uploadStats.completed++; + } catch (error) { + uploadStats.failed++; + uploadStats.failedFiles.push(fileName); + this.logger.error( + `Failed to upload Draw.io SVG ${fileName}:`, + error, + ); + } + }); + + // Store the mapping for both Draw.io and PNG references + drawioSvgMap.set(drawioHref, { attachmentId, apiFilePath, fileName }); + if (pair.pngFile) { + drawioSvgMap.set(pair.pngFile.href, { + attachmentId, + apiFilePath, + fileName, + }); + } + } catch (error) { + this.logger.error( + `Failed to process Draw.io pair ${pair.baseName}:`, + error, + ); + } + } + const uploadOnce = (relPath: string) => { const abs = attachmentCandidates.get(relPath)!; const attachmentId = v7(); @@ -85,16 +193,18 @@ export class ImportAttachmentService { const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`; - attachmentTasks.push(() => this.uploadWithRetry({ - abs, - storageFilePath, - attachmentId, - fileNameWithExt, - ext, - pageId, - fileTask, - uploadStats, - })); + attachmentTasks.push(() => + this.uploadWithRetry({ + abs, + storageFilePath, + attachmentId, + fileNameWithExt, + ext, + pageId, + fileTask, + uploadStats, + }), + ); return { attachmentId, @@ -121,195 +231,301 @@ export class ImportAttachmentService { const pageDir = path.dirname(pageRelativePath); const $ = load(html); - // image - for (const imgEl of $('img').toArray()) { - const $img = $(imgEl); - const src = cleanUrlString($img.attr('src') ?? '')!; - if (!src || src.startsWith('http')) continue; + // Cache for resolved paths to avoid repeated lookups + const resolvedPathCache = new Map(); - const relPath = resolveRelativeAttachmentPath( - src, + const getCachedResolvedPath = (rawPath: string): string | null => { + if (resolvedPathCache.has(rawPath)) { + return resolvedPathCache.get(rawPath)!; + } + const resolved = resolveRelativeAttachmentPath( + rawPath, pageDir, attachmentCandidates, ); - if (!relPath) continue; + resolvedPathCache.set(rawPath, resolved); + return resolved; + }; - const { attachmentId, apiFilePath, abs } = processFile(relPath); - const stat = await fs.stat(abs); + // Cache for file stats to avoid repeated file system calls + const statCache = new Map(); - const width = $img.attr('width') ?? '100%'; - const align = $img.attr('data-align') ?? 'center'; + const getCachedStat = async (absPath: string) => { + if (statCache.has(absPath)) { + return statCache.get(absPath); + } + const stat = await fs.stat(absPath); + statCache.set(absPath, stat); + return stat; + }; - $img - .attr('src', apiFilePath) - .attr('data-attachment-id', attachmentId) - .attr('data-size', stat.size.toString()) - .attr('width', width) - .attr('data-align', align); + // Single DOM traversal for all attachment elements + const selector = + 'img, video, div[data-type="attachment"], a, div[data-type="excalidraw"], div[data-type="drawio"]'; + const elements = $(selector).toArray(); - unwrapFromParagraph($, $img); - } + for (const element of elements) { + const $el = $(element); + const tagName = element.tagName.toLowerCase(); - // video - for (const vidEl of $('video').toArray()) { - const $vid = $(vidEl); - const src = cleanUrlString($vid.attr('src') ?? '')!; - if (!src || src.startsWith('http')) continue; + // Process based on element type + if (tagName === 'img') { + const src = cleanUrlString($el.attr('src') ?? ''); + if (!src || src.startsWith('http')) continue; - const relPath = resolveRelativeAttachmentPath( - src, - pageDir, - attachmentCandidates, - ); - if (!relPath) continue; + const relPath = getCachedResolvedPath(src); + if (!relPath) continue; - const { attachmentId, apiFilePath, abs } = processFile(relPath); - const stat = await fs.stat(abs); + // Check if this image is part of a Draw.io pair + const drawioSvg = drawioSvgMap.get(relPath); + if (drawioSvg) { + const $drawio = $('
') + .attr('data-type', 'drawio') + .attr('data-src', drawioSvg.apiFilePath) + .attr('data-title', 'diagram') + .attr('data-width', '100%') + .attr('data-align', 'center') + .attr('data-attachment-id', drawioSvg.attachmentId); - const width = $vid.attr('width') ?? '100%'; - const align = $vid.attr('data-align') ?? 'center'; + $el.replaceWith($drawio); + unwrapFromParagraph($, $drawio); + continue; + } - $vid - .attr('src', apiFilePath) - .attr('data-attachment-id', attachmentId) - .attr('data-size', stat.size.toString()) - .attr('width', width) - .attr('data-align', align); + const { attachmentId, apiFilePath, abs } = processFile(relPath); + const stat = await getCachedStat(abs); - unwrapFromParagraph($, $vid); - } - - //
- for (const el of $('div[data-type="attachment"]').toArray()) { - const $oldDiv = $(el); - const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!; - if (!rawUrl || rawUrl.startsWith('http')) continue; - - const relPath = resolveRelativeAttachmentPath( - rawUrl, - pageDir, - attachmentCandidates, - ); - if (!relPath) continue; - - const { attachmentId, apiFilePath, abs } = processFile(relPath); - const stat = await fs.stat(abs); - const fileName = path.basename(abs); - const mime = getMimeType(abs); - - const $newDiv = $('
') - .attr('data-type', 'attachment') - .attr('data-attachment-url', apiFilePath) - .attr('data-attachment-name', fileName) - .attr('data-attachment-mime', mime) - .attr('data-attachment-size', stat.size.toString()) - .attr('data-attachment-id', attachmentId); - - $oldDiv.replaceWith($newDiv); - unwrapFromParagraph($, $newDiv); - } - - // rewrite other attachments via - for (const aEl of $('a').toArray()) { - const $a = $(aEl); - const href = cleanUrlString($a.attr('href') ?? '')!; - if (!href || href.startsWith('http')) continue; - - const relPath = resolveRelativeAttachmentPath( - href, - pageDir, - attachmentCandidates, - ); - if (!relPath) continue; - - const { attachmentId, apiFilePath, abs } = processFile(relPath); - const stat = await fs.stat(abs); - const ext = path.extname(relPath).toLowerCase(); - - if (ext === '.mp4') { - const $video = $('