mirror of
https://github.com/docmost/docmost.git
synced 2025-11-15 19:51:13 +10:00
notion formatter
This commit is contained in:
@ -28,6 +28,8 @@ import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils';
|
||||
import { AttachmentType } from '../../core/attachment/attachment.constants';
|
||||
import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils';
|
||||
import { not } from 'rxjs/internal/util/not';
|
||||
import { notionFormatter } from './import-formatter';
|
||||
|
||||
@Injectable()
|
||||
export class FileTaskService {
|
||||
@ -125,6 +127,8 @@ export class FileTaskService {
|
||||
content = await markdownToHtml(content);
|
||||
}
|
||||
|
||||
//content = this.stripAllStyles(content)
|
||||
|
||||
content = await this.rewriteLocalFilesInHtml({
|
||||
html: content,
|
||||
pageRelativePath: relPath,
|
||||
@ -204,10 +208,8 @@ export class FileTaskService {
|
||||
fileTask.creatorId,
|
||||
);
|
||||
|
||||
console.log(htmlContent);
|
||||
|
||||
const pmState = getProsemirrorContent(
|
||||
await this.importService.processHTML(htmlContent),
|
||||
await this.importService.processHTML(notionFormatter(htmlContent)),
|
||||
);
|
||||
|
||||
const { title, prosemirrorJson } =
|
||||
|
||||
127
apps/server/src/integrations/import/import-formatter.ts
Normal file
127
apps/server/src/integrations/import/import-formatter.ts
Normal file
@ -0,0 +1,127 @@
|
||||
import { Window } from 'happy-dom';
|
||||
|
||||
export function notionFormatter(html: string): string {
|
||||
const window = new Window();
|
||||
const doc = window.document;
|
||||
doc.body.innerHTML = html;
|
||||
|
||||
// Block math
|
||||
for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) {
|
||||
// get TeX source from the MathML <annotation>
|
||||
const annotation = fig.querySelector(
|
||||
'annotation[encoding="application/x-tex"]',
|
||||
);
|
||||
const tex = annotation?.textContent?.trim() ?? '';
|
||||
|
||||
const mathBlock = doc.createElement('div');
|
||||
mathBlock.setAttribute('data-type', 'mathBlock');
|
||||
mathBlock.setAttribute('data-katex', 'true');
|
||||
mathBlock.textContent = tex;
|
||||
|
||||
fig.replaceWith(mathBlock);
|
||||
}
|
||||
|
||||
// Inline math
|
||||
for (const token of Array.from(
|
||||
doc.querySelectorAll('span.notion-text-equation-token'),
|
||||
)) {
|
||||
// remove the preceding <style> if it’s that KaTeX import
|
||||
const prev = token.previousElementSibling;
|
||||
if (prev?.tagName === 'STYLE') prev.remove();
|
||||
|
||||
const annotation = token.querySelector(
|
||||
'annotation[encoding="application/x-tex"]',
|
||||
);
|
||||
const tex = annotation?.textContent?.trim() ?? '';
|
||||
|
||||
const mathInline = doc.createElement('span');
|
||||
mathInline.setAttribute('data-type', 'mathInline');
|
||||
mathInline.setAttribute('data-katex', 'true');
|
||||
mathInline.textContent = tex;
|
||||
token.replaceWith(mathInline);
|
||||
}
|
||||
|
||||
// Callouts
|
||||
const figs = Array.from(doc.querySelectorAll('figure.callout')).reverse();
|
||||
|
||||
for (const fig of figs) {
|
||||
// find the content <div> (always the 2nd child in a Notion callout)
|
||||
const contentDiv = fig.querySelector(
|
||||
'div:nth-of-type(2)',
|
||||
) as unknown as HTMLElement | null;
|
||||
if (!contentDiv) continue;
|
||||
|
||||
// pull out every block inside (tables, p, nested callouts, lists…)
|
||||
const blocks = Array.from(contentDiv.childNodes);
|
||||
|
||||
const wrapper = fig.ownerDocument.createElement('div');
|
||||
wrapper.setAttribute('data-type', 'callout');
|
||||
wrapper.setAttribute('data-callout-type', 'info');
|
||||
|
||||
// move each real node into the wrapper (preserves nested structure)
|
||||
// @ts-ignore
|
||||
wrapper.append(...blocks);
|
||||
fig.replaceWith(wrapper);
|
||||
}
|
||||
|
||||
// Todolist
|
||||
const todoLists = Array.from(doc.querySelectorAll('ul.to-do-list'));
|
||||
|
||||
for (const oldList of todoLists) {
|
||||
const newList = doc.createElement('ul');
|
||||
newList.setAttribute('data-type', 'taskList');
|
||||
|
||||
// for each old <li>, create a <li data-type="taskItem" data-checked="…">
|
||||
for (const li of oldList.querySelectorAll('li')) {
|
||||
const isChecked = li.querySelector('.checkbox.checkbox-on') != null;
|
||||
const textSpan = li.querySelector(
|
||||
'span.to-do-children-unchecked, span.to-do-children-checked',
|
||||
);
|
||||
const text = textSpan?.textContent?.trim() ?? '';
|
||||
|
||||
// <li data-type="taskItem" data-checked="true|false">
|
||||
const taskItem = doc.createElement('li');
|
||||
taskItem.setAttribute('data-type', 'taskItem');
|
||||
taskItem.setAttribute('data-checked', String(isChecked));
|
||||
|
||||
// <label><input type="checkbox" [checked]><span></span></label>
|
||||
const label = doc.createElement('label');
|
||||
const input = doc.createElement('input');
|
||||
input.type = 'checkbox';
|
||||
if (isChecked) input.checked = true;
|
||||
const spacer = doc.createElement('span');
|
||||
label.append(input, spacer);
|
||||
|
||||
const container = doc.createElement('div');
|
||||
const p = doc.createElement('p');
|
||||
p.textContent = text;
|
||||
container.appendChild(p);
|
||||
|
||||
taskItem.append(label, container);
|
||||
newList.appendChild(taskItem);
|
||||
}
|
||||
|
||||
oldList.replaceWith(newList);
|
||||
}
|
||||
|
||||
// Fix toggle blocks
|
||||
const detailsList = Array.from(
|
||||
doc.querySelectorAll('ul.toggle details'),
|
||||
).reverse();
|
||||
|
||||
// unwrap from ul and li tags
|
||||
for (const details of detailsList) {
|
||||
const li = details.closest('li');
|
||||
if (li) {
|
||||
li.parentNode!.insertBefore(details, li);
|
||||
if (li.childNodes.length === 0) li.remove();
|
||||
}
|
||||
|
||||
const ul = details.closest('ul.toggle');
|
||||
if (ul) {
|
||||
ul.parentNode!.insertBefore(details, ul);
|
||||
if (ul.childNodes.length === 0) ul.remove();
|
||||
}
|
||||
}
|
||||
return doc.body.innerHTML;
|
||||
}
|
||||
Reference in New Issue
Block a user