From 3bbf7c4475f31db36619d1498062e3f4dbb41320 Mon Sep 17 00:00:00 2001 From: Philipinho <16838612+Philipinho@users.noreply.github.com> Date: Sun, 25 May 2025 20:06:21 -0700 Subject: [PATCH] notion formatter --- .../integrations/import/file-task.service.ts | 8 +- .../integrations/import/import-formatter.ts | 127 ++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 apps/server/src/integrations/import/import-formatter.ts diff --git a/apps/server/src/integrations/import/file-task.service.ts b/apps/server/src/integrations/import/file-task.service.ts index 6c0191a6..2ed96d6e 100644 --- a/apps/server/src/integrations/import/file-task.service.ts +++ b/apps/server/src/integrations/import/file-task.service.ts @@ -28,6 +28,8 @@ import { markdownToHtml } from '@docmost/editor-ext'; import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils'; import { AttachmentType } from '../../core/attachment/attachment.constants'; import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils'; +import { not } from 'rxjs/internal/util/not'; +import { notionFormatter } from './import-formatter'; @Injectable() export class FileTaskService { @@ -125,6 +127,8 @@ export class FileTaskService { content = await markdownToHtml(content); } + //content = this.stripAllStyles(content) + content = await this.rewriteLocalFilesInHtml({ html: content, pageRelativePath: relPath, @@ -204,10 +208,8 @@ export class FileTaskService { fileTask.creatorId, ); - console.log(htmlContent); - const pmState = getProsemirrorContent( - await this.importService.processHTML(htmlContent), + await this.importService.processHTML(notionFormatter(htmlContent)), ); const { title, prosemirrorJson } = diff --git a/apps/server/src/integrations/import/import-formatter.ts b/apps/server/src/integrations/import/import-formatter.ts new file mode 100644 index 00000000..ef37a7e4 --- /dev/null +++ b/apps/server/src/integrations/import/import-formatter.ts @@ -0,0 +1,127 @@ +import { Window } from 'happy-dom'; + +export function notionFormatter(html: string): string { + const window = new Window(); + const doc = window.document; + doc.body.innerHTML = html; + + // Block math + for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) { + // get TeX source from the MathML + const annotation = fig.querySelector( + 'annotation[encoding="application/x-tex"]', + ); + const tex = annotation?.textContent?.trim() ?? ''; + + const mathBlock = doc.createElement('div'); + mathBlock.setAttribute('data-type', 'mathBlock'); + mathBlock.setAttribute('data-katex', 'true'); + mathBlock.textContent = tex; + + fig.replaceWith(mathBlock); + } + + // Inline math + for (const token of Array.from( + doc.querySelectorAll('span.notion-text-equation-token'), + )) { + // remove the preceding