feat: implement Markdown and HTML page imports (#85)

* page import feature
* make file interceptor common

* replace @tiptap/html
* update tiptap version

* reduce table margin

* update tiptap version

* switch to upstream drag handle lib (fixes table dragging)

* WIP

* Page import module and other fixes

* working page imports

* extract page title from h1 heading

* finalize page imports

* cleanup unused imports

* add menu arrow
This commit is contained in:
Philip Okugbe
2024-07-20 17:59:04 +01:00
committed by GitHub
parent 227ac30d5e
commit 937a07059a
35 changed files with 1163 additions and 1038 deletions

View File

@ -58,8 +58,10 @@
"fastify": "^4.28.0",
"fix-esm": "^1.0.1",
"fs-extra": "^11.2.0",
"happy-dom": "^14.12.3",
"kysely": "^0.27.3",
"kysely-migration-cli": "^0.4.2",
"marked": "^13.0.2",
"mime-types": "^2.1.35",
"nanoid": "^5.0.7",
"nestjs-kysely": "^1.0.0",

View File

@ -13,6 +13,7 @@ import { StaticModule } from './integrations/static/static.module';
import { EventEmitterModule } from '@nestjs/event-emitter';
import { HealthModule } from './integrations/health/health.module';
import { ExportModule } from './integrations/export/export.module';
import { ImportModule } from './integrations/import/import.module';
@Module({
imports: [
@ -24,6 +25,7 @@ import { ExportModule } from './integrations/export/export.module';
QueueModule,
StaticModule,
HealthModule,
ImportModule,
ExportModule,
StorageModule.forRootAsync({
imports: [EnvironmentModule],

View File

@ -27,8 +27,8 @@ import {
TiptapVideo,
TrailingNode,
} from '@docmost/editor-ext';
import { generateHTML, generateJSON } from '@tiptap/html';
import { generateText, JSONContent } from '@tiptap/core';
import { generateHTML, generateJSON } from '../common/helpers/prosemirror/html';
export const tiptapExtensions = [
StarterKit,

View File

@ -0,0 +1,21 @@
import { Extensions, getSchema, JSONContent } from '@tiptap/core';
import { DOMSerializer, Node } from '@tiptap/pm/model';
import { Window } from 'happy-dom';
export function generateHTML(doc: JSONContent, extensions: Extensions): string {
const schema = getSchema(extensions);
const contentNode = Node.fromJSON(schema, doc);
const window = new Window();
const fragment = DOMSerializer.fromSchema(schema).serializeFragment(
contentNode.content,
{
document: window.document as unknown as Document,
},
);
const serializer = new window.XMLSerializer();
// @ts-ignore
return serializer.serializeToString(fragment as unknown as Node);
}

View File

@ -0,0 +1,20 @@
import { Extensions, getSchema } from '@tiptap/core';
import { DOMParser, ParseOptions } from '@tiptap/pm/model';
import { Window, DOMParser as HappyDomParser } from 'happy-dom';
export function generateJSON(
html: string,
extensions: Extensions,
options?: ParseOptions,
): Record<string, any> {
const schema = getSchema(extensions);
const window = new Window();
const dom = new HappyDomParser(window).parseFromString(
html,
'text/html',
).body;
// @ts-ignore
return DOMParser.fromSchema(schema).parse(dom, options).toJSON();
}

View File

@ -0,0 +1,2 @@
export * from './generateHTML.js';
export * from './generateJSON.js';

View File

@ -9,7 +9,7 @@ import { Observable } from 'rxjs';
import { FastifyRequest } from 'fastify';
@Injectable()
export class AttachmentInterceptor implements NestInterceptor {
export class FileInterceptor implements NestInterceptor {
public intercept(
context: ExecutionContext,
next: CallHandler,

View File

@ -16,7 +16,7 @@ import {
} from '@nestjs/common';
import { AttachmentService } from './services/attachment.service';
import { FastifyReply } from 'fastify';
import { AttachmentInterceptor } from './interceptors/attachment.interceptor';
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
import * as bytes from 'bytes';
import { AuthUser } from '../../common/decorators/auth-user.decorator';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
@ -63,7 +63,7 @@ export class AttachmentController {
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('files/upload')
@UseInterceptors(AttachmentInterceptor)
@UseInterceptors(FileInterceptor)
async uploadFile(
@Req() req: any,
@Res() res: FastifyReply,
@ -176,7 +176,7 @@ export class AttachmentController {
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('attachments/upload-image')
@UseInterceptors(AttachmentInterceptor)
@UseInterceptors(FileInterceptor)
async uploadAvatarOrLogo(
@Req() req: any,
@Res() res: FastifyReply,

View File

@ -28,7 +28,7 @@ import { getMimeType } from '../../common/helpers';
@Controller()
export class ImportController {
constructor(
private readonly importService: ExportService,
private readonly exportService: ExportService,
private readonly pageRepo: PageRepo,
private readonly spaceAbility: SpaceAbilityFactory,
) {}
@ -54,7 +54,7 @@ export class ImportController {
throw new ForbiddenException();
}
const rawContent = await this.importService.exportPage(dto.format, page);
const rawContent = await this.exportService.exportPage(dto.format, page);
const fileExt = getExportExtension(dto.format);
const fileName = sanitize(page.title || 'Untitled') + fileExt;

View File

@ -0,0 +1,4 @@
export enum ImportFormat {
HTML = 'html',
Markdown = 'markdown',
}

View File

@ -0,0 +1,78 @@
import {
BadRequestException,
Controller,
ForbiddenException,
HttpCode,
HttpStatus,
Logger,
Post,
Req,
UseGuards,
UseInterceptors,
} from '@nestjs/common';
import SpaceAbilityFactory from '../../core/casl/abilities/space-ability.factory';
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
import { AuthUser } from '../../common/decorators/auth-user.decorator';
import { User, Workspace } from '@docmost/db/types/entity.types';
import {
SpaceCaslAction,
SpaceCaslSubject,
} from '../../core/casl/interfaces/space-ability.type';
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
import * as bytes from 'bytes';
import { MAX_FILE_SIZE } from '../../core/attachment/attachment.constants';
import { ImportService } from './import.service';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
@Controller()
export class ImportController {
private readonly logger = new Logger(ImportController.name);
constructor(
private readonly importService: ImportService,
private readonly spaceAbility: SpaceAbilityFactory,
) {}
@UseInterceptors(FileInterceptor)
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('pages/import')
async importPage(
@Req() req: any,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
const maxFileSize = bytes(MAX_FILE_SIZE);
let file = null;
try {
file = await req.file({
limits: { fileSize: maxFileSize, fields: 3, files: 1 },
});
} catch (err: any) {
this.logger.error(err.message);
if (err?.statusCode === 413) {
throw new BadRequestException(
`File too large. Exceeds the ${MAX_FILE_SIZE} limit`,
);
}
}
if (!file) {
throw new BadRequestException('Failed to upload file');
}
const spaceId = file.fields?.spaceId?.value;
if (!spaceId) {
throw new BadRequestException('spaceId or format not found');
}
const ability = await this.spaceAbility.createForUser(user, spaceId);
if (ability.cannot(SpaceCaslAction.Edit, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
return this.importService.importPage(file, user.id, spaceId, workspace.id);
}
}

View File

@ -0,0 +1,9 @@
import { Module } from '@nestjs/common';
import { ImportService } from './import.service';
import { ImportController } from './import.controller';
@Module({
providers: [ImportService],
controllers: [ImportController]
})
export class ImportModule {}

View File

@ -0,0 +1,126 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { MultipartFile } from '@fastify/multipart';
import { sanitize } from 'sanitize-filename-ts';
import * as path from 'path';
import { htmlToJson } from '../../collaboration/collaboration.util';
import { marked } from 'marked';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { generateSlugId } from '../../common/helpers';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { transformHTML } from './utils/html.utils';
@Injectable()
export class ImportService {
private readonly logger = new Logger(ImportService.name);
constructor(
private readonly pageRepo: PageRepo,
@InjectKysely() private readonly db: KyselyDB,
) {}
async importPage(
filePromise: Promise<MultipartFile>,
userId: string,
spaceId: string,
workspaceId: string,
): Promise<void> {
const file = await filePromise;
const fileBuffer = await file.toBuffer();
const fileName = sanitize(file.filename).slice(0, 255).split('.')[0];
const fileExtension = path.extname(file.filename).toLowerCase();
const fileMimeType = file.mimetype;
const fileContent = fileBuffer.toString();
let prosemirrorState = null;
let createdPage = null;
if (fileExtension.endsWith('.md') && fileMimeType === 'text/markdown') {
prosemirrorState = await this.processMarkdown(fileContent);
}
if (fileExtension.endsWith('.html') && fileMimeType === 'text/html') {
prosemirrorState = await this.processHTML(fileContent);
}
if (!prosemirrorState) {
const message = 'Unsupported file format or mime type';
this.logger.error(message);
throw new BadRequestException(message);
}
const { title, prosemirrorJson } =
this.extractTitleAndRemoveHeading(prosemirrorState);
const pageTitle = title || fileName;
if (prosemirrorJson) {
try {
const pagePosition = await this.getNewPagePosition(spaceId);
createdPage = await this.pageRepo.insertPage({
slugId: generateSlugId(),
title: pageTitle,
content: prosemirrorJson,
position: pagePosition,
spaceId: spaceId,
creatorId: userId,
workspaceId: workspaceId,
lastUpdatedById: userId,
});
} catch (err) {
const message = 'Failed to create page';
this.logger.error(message, err);
throw new BadRequestException(message);
}
}
return createdPage;
}
async processMarkdown(markdownInput: string): Promise<any> {
// turn markdown to html
const html = await marked.parse(markdownInput);
return await this.processHTML(html);
}
async processHTML(htmlInput: string): Promise<any> {
// turn html to prosemirror state
return htmlToJson(transformHTML(htmlInput));
}
extractTitleAndRemoveHeading(prosemirrorState: any) {
let title = null;
if (
prosemirrorState?.content?.length > 0 &&
prosemirrorState.content[0].type === 'heading' &&
prosemirrorState.content[0].attrs?.level === 1
) {
title = prosemirrorState.content[0].content[0].text;
// remove h1 header node from state
prosemirrorState.content.shift();
}
return { title, prosemirrorJson: prosemirrorState };
}
async getNewPagePosition(spaceId: string): Promise<string> {
const lastPage = await this.db
.selectFrom('pages')
.select(['id', 'position'])
.where('spaceId', '=', spaceId)
.orderBy('position', 'desc')
.limit(1)
.where('parentPageId', 'is', null)
.executeTakeFirst();
if (lastPage) {
return generateJitteredKeyBetween(lastPage.position, null);
} else {
return generateJitteredKeyBetween(null, null);
}
}
}

View File

@ -0,0 +1,80 @@
import { Window, DOMParser } from 'happy-dom';
function transformTaskList(html: string): string {
const window = new Window();
const doc = new DOMParser(window).parseFromString(html, 'text/html');
const ulElements = doc.querySelectorAll('ul');
ulElements.forEach((ul) => {
let isTaskList = false;
const liElements = ul.querySelectorAll('li');
liElements.forEach((li) => {
const checkbox = li.querySelector('input[type="checkbox"]');
if (checkbox) {
isTaskList = true;
// Add taskItem data type
li.setAttribute('data-type', 'taskItem');
// Set data-checked attribute based on the checkbox state
// @ts-ignore
li.setAttribute('data-checked', checkbox.checked ? 'true' : 'false');
// Remove the checkbox from the li
checkbox.remove();
// Move the content of <p> out of the <p> and remove <p>
const pElements = li.querySelectorAll('p');
pElements.forEach((p) => {
// Append the content of the <p> element to its parent (the <li> element)
while (p.firstChild) {
li.appendChild(p.firstChild);
}
// Remove the now empty <p> element
p.remove();
});
}
});
// If any <li> contains a checkbox, mark the <ul> as a task list
if (isTaskList) {
ul.setAttribute('data-type', 'taskList');
}
});
return doc.body.innerHTML;
}
function transformCallouts(html: string): string {
const window = new Window();
const doc = new DOMParser(window).parseFromString(html, 'text/html');
const calloutRegex = /:::(\w+)\s*([\s\S]*?)\s*:::/g;
const createCalloutDiv = (type: string, content: string): HTMLElement => {
const div = doc.createElement('div');
div.setAttribute('data-type', 'callout');
div.setAttribute('data-callout-type', type);
const p = doc.createElement('p');
p.textContent = content.trim();
div.appendChild(p);
return div as unknown as HTMLElement;
};
const pElements = doc.querySelectorAll('p');
pElements.forEach((p) => {
if (calloutRegex.test(p.innerHTML) && !p.closest('ul, ol')) {
calloutRegex.lastIndex = 0;
const [, type, content] = calloutRegex.exec(p.innerHTML) || [];
const calloutDiv = createCalloutDiv(type, content);
// @ts-ignore
p.replaceWith(calloutDiv);
}
});
return doc.body.innerHTML;
}
export function transformHTML(html: string): string {
return transformTaskList(transformCallouts(html));
}