From 36a573fce9696a8feac9574d03ab8eb3e987ceeb Mon Sep 17 00:00:00 2001 From: Philipinho <16838612+Philipinho@users.noreply.github.com> Date: Mon, 26 May 2025 15:37:35 -0700 Subject: [PATCH] import embeds --- .../integrations/import/file-task.service.ts | 10 ++--- .../integrations/import/import-formatter.ts | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/apps/server/src/integrations/import/file-task.service.ts b/apps/server/src/integrations/import/file-task.service.ts index 2ed96d6e..3ad92d3c 100644 --- a/apps/server/src/integrations/import/file-task.service.ts +++ b/apps/server/src/integrations/import/file-task.service.ts @@ -28,8 +28,7 @@ import { markdownToHtml } from '@docmost/editor-ext'; import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils'; import { AttachmentType } from '../../core/attachment/attachment.constants'; import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils'; -import { not } from 'rxjs/internal/util/not'; -import { notionFormatter } from './import-formatter'; +import { formatImportHtml, notionFormatter } from './import-formatter'; @Injectable() export class FileTaskService { @@ -68,9 +67,8 @@ export class FileTaskService { await pipeline(fileStream, createWriteStream(tmpZipPath)); await extractZip(tmpZipPath, tmpExtractDir); - console.log('extract here'); - // TODO: internal link mentions, backlinks, attachments + // TODO: backlinks try { await this.updateTaskStatus(fileTaskId, FileTaskStatus.Processing); // if type == generic @@ -127,8 +125,6 @@ export class FileTaskService { content = await markdownToHtml(content); } - //content = this.stripAllStyles(content) - content = await this.rewriteLocalFilesInHtml({ html: content, pageRelativePath: relPath, @@ -209,7 +205,7 @@ export class FileTaskService { ); const pmState = getProsemirrorContent( - await this.importService.processHTML(notionFormatter(htmlContent)), + await this.importService.processHTML(formatImportHtml(htmlContent)), ); const { title, prosemirrorJson } = diff --git a/apps/server/src/integrations/import/import-formatter.ts b/apps/server/src/integrations/import/import-formatter.ts index ef37a7e4..a9413d45 100644 --- a/apps/server/src/integrations/import/import-formatter.ts +++ b/apps/server/src/integrations/import/import-formatter.ts @@ -1,10 +1,53 @@ import { Window } from 'happy-dom'; +import { cleanUrlString } from './file.utils'; +import { getEmbedUrlAndProvider } from '@docmost/editor-ext'; + +export function formatImportHtml(html: string) { + const pmHtml = notionFormatter(html); + return defaultHtmlFormatter(pmHtml); +} + +export function defaultHtmlFormatter(html: string): string { + const window = new Window(); + const doc = window.document; + doc.body.innerHTML = html; + + // embed providers + const anchors = Array.from(doc.getElementsByTagName('a')); + for (const a of anchors) { + const href = cleanUrlString(a.getAttribute('href')) ?? ''; + if (!href) continue; + + const embedProvider = getEmbedUrlAndProvider(href); + + if (embedProvider) { + const embed = doc.createElement('div'); + embed.setAttribute('data-type', 'embed'); + embed.setAttribute('data-src', href); + embed.setAttribute('data-provider', embedProvider.provider); + embed.setAttribute('data-align', 'center'); + embed.setAttribute('data-width', '640'); + embed.setAttribute('data-height', '480'); + + a.replaceWith(embed); + } + } + + return doc.body.innerHTML; +} export function notionFormatter(html: string): string { const window = new Window(); const doc = window.document; doc.body.innerHTML = html; + // remove empty description paragraph + doc.querySelectorAll('p.page-description').forEach((p) => { + if (p.textContent?.trim() === '') { + p.remove(); + } + }); + // Block math for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) { // get TeX source from the MathML