Files
docmost/apps/server/src/integrations/export/turndown-utils.ts
2025-08-04 08:01:18 +01:00

166 lines
4.9 KiB
TypeScript

import * as TurndownService from '@joplin/turndown';
import * as TurndownPluginGfm from '@joplin/turndown-plugin-gfm';
import * as path from 'path';
export function turndown(html: string): string {
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
hr: '---',
bulletListMarker: '-',
});
const tables = TurndownPluginGfm.tables;
const strikethrough = TurndownPluginGfm.strikethrough;
const highlightedCodeBlock = TurndownPluginGfm.highlightedCodeBlock;
turndownService.use([
tables,
strikethrough,
highlightedCodeBlock,
taskList,
callout,
preserveDetail,
listParagraph,
mathInline,
mathBlock,
iframeEmbed,
video,
]);
return turndownService.turndown(html).replaceAll('<br>', ' ');
}
function listParagraph(turndownService: TurndownService) {
turndownService.addRule('paragraph', {
filter: ['p'],
replacement: (content: any, node: HTMLInputElement) => {
if (node.parentElement?.nodeName === 'LI') {
return content;
}
return `\n\n${content}\n\n`;
},
});
}
function callout(turndownService: TurndownService) {
turndownService.addRule('callout', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'DIV' && node.getAttribute('data-type') === 'callout'
);
},
replacement: function (content: any, node: HTMLInputElement) {
const calloutType = node.getAttribute('data-callout-type');
return `\n\n:::${calloutType}\n${content.trim()}\n:::\n\n`;
},
});
}
function taskList(turndownService: TurndownService) {
turndownService.addRule('taskListItem', {
filter: function (node: HTMLInputElement) {
return (
node.getAttribute('data-type') === 'taskItem' &&
node.parentNode.nodeName === 'UL'
);
},
replacement: function (content: any, node: HTMLInputElement) {
const checkbox = node.querySelector(
'input[type="checkbox"]',
) as HTMLInputElement;
const isChecked = checkbox.checked;
// Process content like regular list items
content = content
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
.replace(/\n/gm, '\n '); // indent nested content with 2 spaces
// Create the checkbox prefix
const prefix = `- ${isChecked ? '[x]' : '[ ]'} `;
return prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '');
},
});
}
function preserveDetail(turndownService: TurndownService) {
turndownService.addRule('preserveDetail', {
filter: function (node: HTMLInputElement) {
return node.nodeName === 'DETAILS';
},
replacement: function (content: any, node: HTMLInputElement) {
const summary = node.querySelector(':scope > summary');
let detailSummary = '';
if (summary) {
detailSummary = `<summary>${turndownService.turndown(summary.innerHTML)}</summary>`;
}
const detailsContent = Array.from(node.childNodes)
.filter((child) => child.nodeName !== 'SUMMARY')
.map((child) =>
child.nodeType === 1
? turndownService.turndown((child as HTMLElement).outerHTML)
: child.textContent,
)
.join('');
return `\n<details>\n${detailSummary}\n\n${detailsContent}\n\n</details>\n`;
},
});
}
function mathInline(turndownService: TurndownService) {
turndownService.addRule('mathInline', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'SPAN' &&
node.getAttribute('data-type') === 'mathInline'
);
},
replacement: function (content: any, node: HTMLInputElement) {
return `$${content}$`;
},
});
}
function mathBlock(turndownService: TurndownService) {
turndownService.addRule('mathBlock', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'DIV' &&
node.getAttribute('data-type') === 'mathBlock'
);
},
replacement: function (content: any, node: HTMLInputElement) {
return `\n$$\n${content}\n$$\n`;
},
});
}
function iframeEmbed(turndownService: TurndownService) {
turndownService.addRule('iframeEmbed', {
filter: function (node: HTMLInputElement) {
return node.nodeName === 'IFRAME';
},
replacement: function (content: any, node: HTMLInputElement) {
const src = node.getAttribute('src');
return '[' + src + '](' + src + ')';
},
});
}
function video(turndownService: TurndownService) {
turndownService.addRule('video', {
filter: function (node: HTMLInputElement) {
return node.tagName === 'VIDEO';
},
replacement: function (content: any, node: HTMLInputElement) {
const src = node.getAttribute('src') || '';
const name = path.basename(src);
return '[' + name + '](' + src + ')';
},
});
}