notion formatter

This commit is contained in:
Philipinho
2025-05-25 20:06:21 -07:00
parent ca23c9a4f2
commit 3bbf7c4475
2 changed files with 132 additions and 3 deletions

View File

@ -28,6 +28,8 @@ import { markdownToHtml } from '@docmost/editor-ext';
import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils';
import { AttachmentType } from '../../core/attachment/attachment.constants';
import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils';
import { not } from 'rxjs/internal/util/not';
import { notionFormatter } from './import-formatter';
@Injectable()
export class FileTaskService {
@ -125,6 +127,8 @@ export class FileTaskService {
content = await markdownToHtml(content);
}
//content = this.stripAllStyles(content)
content = await this.rewriteLocalFilesInHtml({
html: content,
pageRelativePath: relPath,
@ -204,10 +208,8 @@ export class FileTaskService {
fileTask.creatorId,
);
console.log(htmlContent);
const pmState = getProsemirrorContent(
await this.importService.processHTML(htmlContent),
await this.importService.processHTML(notionFormatter(htmlContent)),
);
const { title, prosemirrorJson } =

View File

@ -0,0 +1,127 @@
import { Window } from 'happy-dom';
export function notionFormatter(html: string): string {
const window = new Window();
const doc = window.document;
doc.body.innerHTML = html;
// Block math
for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) {
// get TeX source from the MathML <annotation>
const annotation = fig.querySelector(
'annotation[encoding="application/x-tex"]',
);
const tex = annotation?.textContent?.trim() ?? '';
const mathBlock = doc.createElement('div');
mathBlock.setAttribute('data-type', 'mathBlock');
mathBlock.setAttribute('data-katex', 'true');
mathBlock.textContent = tex;
fig.replaceWith(mathBlock);
}
// Inline math
for (const token of Array.from(
doc.querySelectorAll('span.notion-text-equation-token'),
)) {
// remove the preceding <style> if its that KaTeX import
const prev = token.previousElementSibling;
if (prev?.tagName === 'STYLE') prev.remove();
const annotation = token.querySelector(
'annotation[encoding="application/x-tex"]',
);
const tex = annotation?.textContent?.trim() ?? '';
const mathInline = doc.createElement('span');
mathInline.setAttribute('data-type', 'mathInline');
mathInline.setAttribute('data-katex', 'true');
mathInline.textContent = tex;
token.replaceWith(mathInline);
}
// Callouts
const figs = Array.from(doc.querySelectorAll('figure.callout')).reverse();
for (const fig of figs) {
// find the content <div> (always the 2nd child in a Notion callout)
const contentDiv = fig.querySelector(
'div:nth-of-type(2)',
) as unknown as HTMLElement | null;
if (!contentDiv) continue;
// pull out every block inside (tables, p, nested callouts, lists…)
const blocks = Array.from(contentDiv.childNodes);
const wrapper = fig.ownerDocument.createElement('div');
wrapper.setAttribute('data-type', 'callout');
wrapper.setAttribute('data-callout-type', 'info');
// move each real node into the wrapper (preserves nested structure)
// @ts-ignore
wrapper.append(...blocks);
fig.replaceWith(wrapper);
}
// Todolist
const todoLists = Array.from(doc.querySelectorAll('ul.to-do-list'));
for (const oldList of todoLists) {
const newList = doc.createElement('ul');
newList.setAttribute('data-type', 'taskList');
// for each old <li>, create a <li data-type="taskItem" data-checked="…">
for (const li of oldList.querySelectorAll('li')) {
const isChecked = li.querySelector('.checkbox.checkbox-on') != null;
const textSpan = li.querySelector(
'span.to-do-children-unchecked, span.to-do-children-checked',
);
const text = textSpan?.textContent?.trim() ?? '';
// <li data-type="taskItem" data-checked="true|false">
const taskItem = doc.createElement('li');
taskItem.setAttribute('data-type', 'taskItem');
taskItem.setAttribute('data-checked', String(isChecked));
// <label><input type="checkbox" [checked]><span></span></label>
const label = doc.createElement('label');
const input = doc.createElement('input');
input.type = 'checkbox';
if (isChecked) input.checked = true;
const spacer = doc.createElement('span');
label.append(input, spacer);
const container = doc.createElement('div');
const p = doc.createElement('p');
p.textContent = text;
container.appendChild(p);
taskItem.append(label, container);
newList.appendChild(taskItem);
}
oldList.replaceWith(newList);
}
// Fix toggle blocks
const detailsList = Array.from(
doc.querySelectorAll('ul.toggle details'),
).reverse();
// unwrap from ul and li tags
for (const details of detailsList) {
const li = details.closest('li');
if (li) {
li.parentNode!.insertBefore(details, li);
if (li.childNodes.length === 0) li.remove();
}
const ul = details.closest('ul.toggle');
if (ul) {
ul.parentNode!.insertBefore(details, ul);
if (ul.childNodes.length === 0) ul.remove();
}
}
return doc.body.innerHTML;
}