* Make page import handling better

This commit is contained in:
Philipinho
2024-07-21 20:48:33 +01:00
parent e5a97d2a26
commit 89f6311e46
8 changed files with 124 additions and 111 deletions

View File

@ -28,7 +28,9 @@ import {
TrailingNode,
} from '@docmost/editor-ext';
import { generateText, JSONContent } from '@tiptap/core';
import { generateHTML, generateJSON } from '../common/helpers/prosemirror/html';
import { generateHTML } from '../common/helpers/prosemirror/html';
// default tiptap library works best generating prosemirror json state
import { generateJSON } from '@tiptap/html';
export const tiptapExtensions = [
StarterKit,

View File

@ -20,6 +20,7 @@ import {
} from '../../core/casl/interfaces/space-ability.type';
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
import * as bytes from 'bytes';
import * as path from 'path';
import { MAX_FILE_SIZE } from '../../core/attachment/attachment.constants';
import { ImportService } from './import.service';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
@ -42,6 +43,8 @@ export class ImportController {
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
const validFileExtensions = ['.md', '.html'];
const maxFileSize = bytes(MAX_FILE_SIZE);
let file = null;
@ -62,6 +65,12 @@ export class ImportController {
throw new BadRequestException('Failed to upload file');
}
if (
!validFileExtensions.includes(path.extname(file.filename).toLowerCase())
) {
throw new BadRequestException('Invalid import file type.');
}
const spaceId = file.fields?.spaceId?.value;
if (!spaceId) {

View File

@ -4,12 +4,11 @@ import { MultipartFile } from '@fastify/multipart';
import { sanitize } from 'sanitize-filename-ts';
import * as path from 'path';
import { htmlToJson } from '../../collaboration/collaboration.util';
import { marked } from 'marked';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { generateSlugId } from '../../common/helpers';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { transformHTML } from './utils/html.utils';
import { markdownToHtml } from './utils/marked.utils';
@Injectable()
export class ImportService {
@ -36,16 +35,23 @@ export class ImportService {
let prosemirrorState = null;
let createdPage = null;
if (fileExtension.endsWith('.md') && fileMimeType === 'text/markdown') {
prosemirrorState = await this.processMarkdown(fileContent);
}
if (fileExtension.endsWith('.html') && fileMimeType === 'text/html') {
prosemirrorState = await this.processHTML(fileContent);
try {
if (fileExtension.endsWith('.md') && fileMimeType === 'text/markdown') {
prosemirrorState = await this.processMarkdown(fileContent);
} else if (
fileExtension.endsWith('.html') &&
fileMimeType === 'text/html'
) {
prosemirrorState = await this.processHTML(fileContent);
}
} catch (err) {
const message = 'Error processing file content';
this.logger.error(message, err);
throw new BadRequestException(message);
}
if (!prosemirrorState) {
const message = 'Unsupported file format or mime type';
const message = 'Failed to create ProseMirror state';
this.logger.error(message);
throw new BadRequestException(message);
}
@ -69,8 +75,12 @@ export class ImportService {
workspaceId: workspaceId,
lastUpdatedById: userId,
});
this.logger.debug(
`Successfully imported "${title}${fileExtension}. ID: ${createdPage.id} - SlugId: ${createdPage.slugId}"`,
);
} catch (err) {
const message = 'Failed to create page';
const message = 'Failed to create imported page';
this.logger.error(message, err);
throw new BadRequestException(message);
}
@ -80,14 +90,20 @@ export class ImportService {
}
async processMarkdown(markdownInput: string): Promise<any> {
// turn markdown to html
const html = await marked.parse(markdownInput);
return await this.processHTML(html);
try {
const html = await markdownToHtml(markdownInput);
return this.processHTML(html);
} catch (err) {
throw err;
}
}
async processHTML(htmlInput: string): Promise<any> {
// turn html to prosemirror state
return htmlToJson(transformHTML(htmlInput));
try {
return htmlToJson(htmlInput);
} catch (err) {
throw err;
}
}
extractTitleAndRemoveHeading(prosemirrorState: any) {

View File

@ -1,80 +0,0 @@
import { Window, DOMParser } from 'happy-dom';
function transformTaskList(html: string): string {
const window = new Window();
const doc = new DOMParser(window).parseFromString(html, 'text/html');
const ulElements = doc.querySelectorAll('ul');
ulElements.forEach((ul) => {
let isTaskList = false;
const liElements = ul.querySelectorAll('li');
liElements.forEach((li) => {
const checkbox = li.querySelector('input[type="checkbox"]');
if (checkbox) {
isTaskList = true;
// Add taskItem data type
li.setAttribute('data-type', 'taskItem');
// Set data-checked attribute based on the checkbox state
// @ts-ignore
li.setAttribute('data-checked', checkbox.checked ? 'true' : 'false');
// Remove the checkbox from the li
checkbox.remove();
// Move the content of <p> out of the <p> and remove <p>
const pElements = li.querySelectorAll('p');
pElements.forEach((p) => {
// Append the content of the <p> element to its parent (the <li> element)
while (p.firstChild) {
li.appendChild(p.firstChild);
}
// Remove the now empty <p> element
p.remove();
});
}
});
// If any <li> contains a checkbox, mark the <ul> as a task list
if (isTaskList) {
ul.setAttribute('data-type', 'taskList');
}
});
return doc.body.innerHTML;
}
function transformCallouts(html: string): string {
const window = new Window();
const doc = new DOMParser(window).parseFromString(html, 'text/html');
const calloutRegex = /:::(\w+)\s*([\s\S]*?)\s*:::/g;
const createCalloutDiv = (type: string, content: string): HTMLElement => {
const div = doc.createElement('div');
div.setAttribute('data-type', 'callout');
div.setAttribute('data-callout-type', type);
const p = doc.createElement('p');
p.textContent = content.trim();
div.appendChild(p);
return div as unknown as HTMLElement;
};
const pElements = doc.querySelectorAll('p');
pElements.forEach((p) => {
if (calloutRegex.test(p.innerHTML) && !p.closest('ul, ol')) {
calloutRegex.lastIndex = 0;
const [, type, content] = calloutRegex.exec(p.innerHTML) || [];
const calloutDiv = createCalloutDiv(type, content);
// @ts-ignore
p.replaceWith(calloutDiv);
}
});
return doc.body.innerHTML;
}
export function transformHTML(html: string): string {
return transformTaskList(transformCallouts(html));
}

View File

@ -0,0 +1,36 @@
import { marked } from 'marked';
marked.use({
renderer: {
// @ts-ignore
list(body: string, isOrdered: boolean, start: number) {
if (isOrdered) {
const startAttr = start !== 1 ? ` start="${start}"` : '';
return `<ol ${startAttr}>\n${body}</ol>\n`;
}
const dataType = body.includes(`<input`) ? ' data-type="taskList"' : '';
return `<ul${dataType}>\n${body}</ul>\n`;
},
// @ts-ignore
listitem({ text, raw, task: isTask, checked: isChecked }): string {
if (!isTask) {
return `<li>${text}</li>\n`;
}
const checkedAttr = isChecked
? 'data-checked="true"'
: 'data-checked="false"';
return `<li data-type="taskItem" ${checkedAttr}>${text}</li>\n`;
},
},
});
export async function markdownToHtml(markdownInput: string): Promise<string> {
const YAML_FONT_MATTER_REGEX = /^\s*---[\s\S]*?---\s*/;
const markdown = markdownInput
.replace(YAML_FONT_MATTER_REGEX, '')
.trimStart();
return marked.parse(markdown);
}