mirror of
https://github.com/docmost/docmost.git
synced 2025-11-10 03:42:05 +10:00
WIP
This commit is contained in:
Submodule apps/server/src/ee updated: b312008b4b...554b8ea023
@ -1,513 +0,0 @@
|
|||||||
import { Injectable, Logger } from '@nestjs/common';
|
|
||||||
import * as path from 'path';
|
|
||||||
import { jsonToText } from '../../collaboration/collaboration.util';
|
|
||||||
import { InjectKysely } from 'nestjs-kysely';
|
|
||||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
|
||||||
import { cleanUrlString, extractZip, FileTaskStatus } from './file.utils';
|
|
||||||
import { StorageService } from '../storage/storage.service';
|
|
||||||
import * as tmp from 'tmp-promise';
|
|
||||||
import { pipeline } from 'node:stream/promises';
|
|
||||||
import { createReadStream, createWriteStream } from 'node:fs';
|
|
||||||
import { ImportService } from './import.service';
|
|
||||||
import { promises as fs } from 'fs';
|
|
||||||
import {
|
|
||||||
generateSlugId,
|
|
||||||
getMimeType,
|
|
||||||
sanitizeFileName,
|
|
||||||
} from '../../common/helpers';
|
|
||||||
import { v7 } from 'uuid';
|
|
||||||
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
|
||||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
|
||||||
import { markdownToHtml } from '@docmost/editor-ext';
|
|
||||||
import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils';
|
|
||||||
import { AttachmentType } from '../../core/attachment/attachment.constants';
|
|
||||||
import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils';
|
|
||||||
import { formatImportHtml, unwrapFromParagraph } from './import-formatter';
|
|
||||||
import {
|
|
||||||
buildAttachmentCandidates,
|
|
||||||
collectMarkdownAndHtmlFiles,
|
|
||||||
resolveRelativeAttachmentPath,
|
|
||||||
} from './import.utils';
|
|
||||||
import { executeTx } from '@docmost/db/utils';
|
|
||||||
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
|
||||||
import { load } from 'cheerio';
|
|
||||||
|
|
||||||
@Injectable()
|
|
||||||
export class FileTaskService {
|
|
||||||
private readonly logger = new Logger(FileTaskService.name);
|
|
||||||
|
|
||||||
constructor(
|
|
||||||
private readonly storageService: StorageService,
|
|
||||||
private readonly importService: ImportService,
|
|
||||||
private readonly backlinkRepo: BacklinkRepo,
|
|
||||||
@InjectKysely() private readonly db: KyselyDB,
|
|
||||||
) {}
|
|
||||||
|
|
||||||
async processZIpImport(fileTaskId: string): Promise<void> {
|
|
||||||
const fileTask = await this.db
|
|
||||||
.selectFrom('fileTasks')
|
|
||||||
.selectAll()
|
|
||||||
.where('id', '=', fileTaskId)
|
|
||||||
.executeTakeFirst();
|
|
||||||
|
|
||||||
if (!fileTask) {
|
|
||||||
this.logger.log(`Import file task with ID ${fileTaskId} not found`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fileTask.status === FileTaskStatus.Success) {
|
|
||||||
this.logger.log('Imported task already processed.');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const { path: tmpZipPath, cleanup: cleanupTmpFile } = await tmp.file({
|
|
||||||
prefix: 'docmost-import',
|
|
||||||
postfix: '.zip',
|
|
||||||
discardDescriptor: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
const { path: tmpExtractDir, cleanup: cleanupTmpDir } = await tmp.dir({
|
|
||||||
prefix: 'docmost-extract-',
|
|
||||||
unsafeCleanup: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
const fileStream = await this.storageService.readStream(fileTask.filePath);
|
|
||||||
await pipeline(fileStream, createWriteStream(tmpZipPath));
|
|
||||||
|
|
||||||
await extractZip(tmpZipPath, tmpExtractDir);
|
|
||||||
|
|
||||||
try {
|
|
||||||
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Processing);
|
|
||||||
// if type == generic
|
|
||||||
if (fileTask.source === 'generic') {
|
|
||||||
await this.processGenericImport({
|
|
||||||
extractDir: tmpExtractDir,
|
|
||||||
fileTask,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
if (fileTask.source === 'confluence') {
|
|
||||||
await this.processConfluenceImport({
|
|
||||||
extractDir: tmpExtractDir,
|
|
||||||
fileTask,
|
|
||||||
});
|
|
||||||
}*/
|
|
||||||
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Success);
|
|
||||||
} catch (error) {
|
|
||||||
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Failed);
|
|
||||||
this.logger.error(error);
|
|
||||||
} finally {
|
|
||||||
await cleanupTmpFile();
|
|
||||||
await cleanupTmpDir();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async processGenericImport(opts: {
|
|
||||||
extractDir: string;
|
|
||||||
fileTask: FileTask;
|
|
||||||
}): Promise<void> {
|
|
||||||
const { extractDir, fileTask } = opts;
|
|
||||||
const allFiles = await collectMarkdownAndHtmlFiles(extractDir);
|
|
||||||
const attachmentCandidates = await buildAttachmentCandidates(extractDir);
|
|
||||||
|
|
||||||
const pagesMap = new Map<
|
|
||||||
string,
|
|
||||||
{
|
|
||||||
id: string;
|
|
||||||
slugId: string;
|
|
||||||
name: string;
|
|
||||||
content: string;
|
|
||||||
position?: string | null;
|
|
||||||
parentPageId: string | null;
|
|
||||||
fileExtension: string;
|
|
||||||
filePath: string;
|
|
||||||
}
|
|
||||||
>();
|
|
||||||
|
|
||||||
for (const absPath of allFiles) {
|
|
||||||
const relPath = path
|
|
||||||
.relative(extractDir, absPath)
|
|
||||||
.split(path.sep)
|
|
||||||
.join('/'); // normalize to forward-slashes
|
|
||||||
const ext = path.extname(relPath).toLowerCase();
|
|
||||||
let content = await fs.readFile(absPath, 'utf-8');
|
|
||||||
|
|
||||||
if (ext.toLowerCase() === '.md') {
|
|
||||||
content = await markdownToHtml(content);
|
|
||||||
}
|
|
||||||
|
|
||||||
pagesMap.set(relPath, {
|
|
||||||
id: v7(),
|
|
||||||
slugId: generateSlugId(),
|
|
||||||
name: path.basename(relPath, ext),
|
|
||||||
content,
|
|
||||||
parentPageId: null,
|
|
||||||
fileExtension: ext,
|
|
||||||
filePath: relPath,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// parent/child linking
|
|
||||||
pagesMap.forEach((page, filePath) => {
|
|
||||||
const segments = filePath.split('/');
|
|
||||||
segments.pop();
|
|
||||||
let parentPage = null;
|
|
||||||
while (segments.length) {
|
|
||||||
const tryMd = segments.join('/') + '.md';
|
|
||||||
const tryHtml = segments.join('/') + '.html';
|
|
||||||
if (pagesMap.has(tryMd)) {
|
|
||||||
parentPage = pagesMap.get(tryMd)!;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (pagesMap.has(tryHtml)) {
|
|
||||||
parentPage = pagesMap.get(tryHtml)!;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
segments.pop();
|
|
||||||
}
|
|
||||||
if (parentPage) page.parentPageId = parentPage.id;
|
|
||||||
});
|
|
||||||
|
|
||||||
// generate position keys
|
|
||||||
const siblingsMap = new Map<string | null, typeof Array.prototype>();
|
|
||||||
pagesMap.forEach((page) => {
|
|
||||||
const sibs = siblingsMap.get(page.parentPageId) || [];
|
|
||||||
sibs.push(page);
|
|
||||||
siblingsMap.set(page.parentPageId, sibs);
|
|
||||||
});
|
|
||||||
siblingsMap.forEach((sibs) => {
|
|
||||||
sibs.sort((a, b) => a.name.localeCompare(b.name));
|
|
||||||
let prevPos: string | null = null;
|
|
||||||
for (const page of sibs) {
|
|
||||||
page.position = generateJitteredKeyBetween(prevPos, null);
|
|
||||||
prevPos = page.position;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
const filePathToPageMetaMap = new Map<
|
|
||||||
string,
|
|
||||||
{ id: string; title: string; slugId: string }
|
|
||||||
>();
|
|
||||||
pagesMap.forEach((page) => {
|
|
||||||
filePathToPageMetaMap.set(page.filePath, {
|
|
||||||
id: page.id,
|
|
||||||
title: page.name,
|
|
||||||
slugId: page.slugId,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
const pageResults = await Promise.all(
|
|
||||||
Array.from(pagesMap.values()).map(async (page) => {
|
|
||||||
const htmlContent = await this.rewriteLocalFilesInHtml({
|
|
||||||
html: page.content,
|
|
||||||
pageRelativePath: page.filePath,
|
|
||||||
extractDir,
|
|
||||||
pageId: page.id,
|
|
||||||
fileTask,
|
|
||||||
attachmentCandidates,
|
|
||||||
});
|
|
||||||
|
|
||||||
const { html, backlinks } = await formatImportHtml({
|
|
||||||
html: htmlContent,
|
|
||||||
currentFilePath: page.filePath,
|
|
||||||
filePathToPageMetaMap: filePathToPageMetaMap,
|
|
||||||
creatorId: fileTask.creatorId,
|
|
||||||
sourcePageId: page.id,
|
|
||||||
workspaceId: fileTask.workspaceId,
|
|
||||||
});
|
|
||||||
|
|
||||||
const pmState = getProsemirrorContent(
|
|
||||||
await this.importService.processHTML(html),
|
|
||||||
);
|
|
||||||
|
|
||||||
const { title, prosemirrorJson } =
|
|
||||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
|
||||||
|
|
||||||
const insertablePage: InsertablePage = {
|
|
||||||
id: page.id,
|
|
||||||
slugId: page.slugId,
|
|
||||||
title: title || page.name,
|
|
||||||
content: prosemirrorJson,
|
|
||||||
textContent: jsonToText(prosemirrorJson),
|
|
||||||
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
|
||||||
position: page.position!,
|
|
||||||
spaceId: fileTask.spaceId,
|
|
||||||
workspaceId: fileTask.workspaceId,
|
|
||||||
creatorId: fileTask.creatorId,
|
|
||||||
lastUpdatedById: fileTask.creatorId,
|
|
||||||
parentPageId: page.parentPageId,
|
|
||||||
};
|
|
||||||
|
|
||||||
return { insertablePage, backlinks };
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
const insertablePages = pageResults.map((r) => r.insertablePage);
|
|
||||||
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
|
|
||||||
|
|
||||||
if (insertablePages.length < 1) return;
|
|
||||||
const validPageIds = new Set(insertablePages.map((row) => row.id));
|
|
||||||
const filteredBacklinks = insertableBacklinks.filter(
|
|
||||||
({ sourcePageId, targetPageId }) =>
|
|
||||||
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
|
||||||
);
|
|
||||||
|
|
||||||
await executeTx(this.db, async (trx) => {
|
|
||||||
await trx.insertInto('pages').values(insertablePages).execute();
|
|
||||||
|
|
||||||
if (filteredBacklinks.length > 0) {
|
|
||||||
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async rewriteLocalFilesInHtml(opts: {
|
|
||||||
html: string;
|
|
||||||
pageRelativePath: string;
|
|
||||||
extractDir: string;
|
|
||||||
pageId: string;
|
|
||||||
fileTask: FileTask;
|
|
||||||
attachmentCandidates: Map<string, string>;
|
|
||||||
}): Promise<string> {
|
|
||||||
const {
|
|
||||||
html,
|
|
||||||
pageRelativePath,
|
|
||||||
extractDir,
|
|
||||||
pageId,
|
|
||||||
fileTask,
|
|
||||||
attachmentCandidates,
|
|
||||||
} = opts;
|
|
||||||
|
|
||||||
const attachmentTasks: Promise<void>[] = [];
|
|
||||||
|
|
||||||
const processFile = (relPath: string) => {
|
|
||||||
const abs = attachmentCandidates.get(relPath)!;
|
|
||||||
const attachmentId = v7();
|
|
||||||
const ext = path.extname(abs);
|
|
||||||
|
|
||||||
const fileNameWithExt =
|
|
||||||
sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase();
|
|
||||||
|
|
||||||
const storageFilePath = `${getAttachmentFolderPath(AttachmentType.File, fileTask.workspaceId)}/${attachmentId}/${fileNameWithExt}`;
|
|
||||||
|
|
||||||
const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`;
|
|
||||||
|
|
||||||
attachmentTasks.push(
|
|
||||||
(async () => {
|
|
||||||
const fileStream = createReadStream(abs);
|
|
||||||
await this.storageService.uploadStream(storageFilePath, fileStream);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
|
|
||||||
await this.db
|
|
||||||
.insertInto('attachments')
|
|
||||||
.values({
|
|
||||||
id: attachmentId,
|
|
||||||
filePath: storageFilePath,
|
|
||||||
fileName: fileNameWithExt,
|
|
||||||
fileSize: stat.size,
|
|
||||||
mimeType: getMimeType(fileNameWithExt),
|
|
||||||
type: 'file',
|
|
||||||
fileExt: ext,
|
|
||||||
creatorId: fileTask.creatorId,
|
|
||||||
workspaceId: fileTask.workspaceId,
|
|
||||||
pageId,
|
|
||||||
spaceId: fileTask.spaceId,
|
|
||||||
})
|
|
||||||
.execute();
|
|
||||||
})(),
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
attachmentId,
|
|
||||||
storageFilePath,
|
|
||||||
apiFilePath,
|
|
||||||
fileNameWithExt,
|
|
||||||
abs,
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
const pageDir = path.dirname(pageRelativePath);
|
|
||||||
const $ = load(html);
|
|
||||||
|
|
||||||
// image
|
|
||||||
for (const imgEl of $('img').toArray()) {
|
|
||||||
const $img = $(imgEl);
|
|
||||||
const src = cleanUrlString($img.attr('src') ?? '')!;
|
|
||||||
if (!src || src.startsWith('http')) continue;
|
|
||||||
|
|
||||||
const relPath = resolveRelativeAttachmentPath(
|
|
||||||
src,
|
|
||||||
pageDir,
|
|
||||||
attachmentCandidates,
|
|
||||||
);
|
|
||||||
if (!relPath) continue;
|
|
||||||
|
|
||||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
|
|
||||||
const width = $img.attr('width') ?? '100%';
|
|
||||||
const align = $img.attr('data-align') ?? 'center';
|
|
||||||
|
|
||||||
$img
|
|
||||||
.attr('src', apiFilePath)
|
|
||||||
.attr('data-attachment-id', attachmentId)
|
|
||||||
.attr('data-size', stat.size.toString())
|
|
||||||
.attr('width', width)
|
|
||||||
.attr('data-align', align);
|
|
||||||
|
|
||||||
unwrapFromParagraph($, $img);
|
|
||||||
}
|
|
||||||
|
|
||||||
// video
|
|
||||||
for (const vidEl of $('video').toArray()) {
|
|
||||||
const $vid = $(vidEl);
|
|
||||||
const src = cleanUrlString($vid.attr('src') ?? '')!;
|
|
||||||
if (!src || src.startsWith('http')) continue;
|
|
||||||
|
|
||||||
const relPath = resolveRelativeAttachmentPath(
|
|
||||||
src,
|
|
||||||
pageDir,
|
|
||||||
attachmentCandidates,
|
|
||||||
);
|
|
||||||
if (!relPath) continue;
|
|
||||||
|
|
||||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
|
|
||||||
const width = $vid.attr('width') ?? '100%';
|
|
||||||
const align = $vid.attr('data-align') ?? 'center';
|
|
||||||
|
|
||||||
$vid
|
|
||||||
.attr('src', apiFilePath)
|
|
||||||
.attr('data-attachment-id', attachmentId)
|
|
||||||
.attr('data-size', stat.size.toString())
|
|
||||||
.attr('width', width)
|
|
||||||
.attr('data-align', align);
|
|
||||||
|
|
||||||
unwrapFromParagraph($, $vid);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const el of $('div[data-type="attachment"]').toArray()) {
|
|
||||||
const $oldDiv = $(el);
|
|
||||||
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
|
|
||||||
if (!rawUrl || rawUrl.startsWith('http')) continue;
|
|
||||||
|
|
||||||
const relPath = resolveRelativeAttachmentPath(
|
|
||||||
rawUrl,
|
|
||||||
pageDir,
|
|
||||||
attachmentCandidates,
|
|
||||||
);
|
|
||||||
if (!relPath) continue;
|
|
||||||
|
|
||||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
const fileName = path.basename(abs);
|
|
||||||
const mime = getMimeType(abs);
|
|
||||||
|
|
||||||
const $newDiv = $('<div>')
|
|
||||||
.attr('data-type', 'attachment')
|
|
||||||
.attr('data-attachment-url', apiFilePath)
|
|
||||||
.attr('data-attachment-name', fileName)
|
|
||||||
.attr('data-attachment-mime', mime)
|
|
||||||
.attr('data-attachment-size', stat.size.toString())
|
|
||||||
.attr('data-attachment-id', attachmentId);
|
|
||||||
|
|
||||||
$oldDiv.replaceWith($newDiv);
|
|
||||||
unwrapFromParagraph($, $newDiv);
|
|
||||||
}
|
|
||||||
|
|
||||||
// rewrite other attachments via <a>
|
|
||||||
for (const aEl of $('a').toArray()) {
|
|
||||||
const $a = $(aEl);
|
|
||||||
const href = cleanUrlString($a.attr('href') ?? '')!;
|
|
||||||
if (!href || href.startsWith('http')) continue;
|
|
||||||
|
|
||||||
const relPath = resolveRelativeAttachmentPath(
|
|
||||||
href,
|
|
||||||
pageDir,
|
|
||||||
attachmentCandidates,
|
|
||||||
);
|
|
||||||
if (!relPath) continue;
|
|
||||||
|
|
||||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
const ext = path.extname(relPath).toLowerCase();
|
|
||||||
|
|
||||||
if (ext === '.mp4') {
|
|
||||||
const $video = $('<video>')
|
|
||||||
.attr('src', apiFilePath)
|
|
||||||
.attr('data-attachment-id', attachmentId)
|
|
||||||
.attr('data-size', stat.size.toString())
|
|
||||||
.attr('width', '100%')
|
|
||||||
.attr('data-align', 'center');
|
|
||||||
$a.replaceWith($video);
|
|
||||||
unwrapFromParagraph($, $video);
|
|
||||||
} else {
|
|
||||||
// build attachment <div>
|
|
||||||
const confAliasName = $a.attr('data-linked-resource-default-alias');
|
|
||||||
let attachmentName = path.basename(abs);
|
|
||||||
if (confAliasName) attachmentName = confAliasName;
|
|
||||||
|
|
||||||
const $div = $('<div>')
|
|
||||||
.attr('data-type', 'attachment')
|
|
||||||
.attr('data-attachment-url', apiFilePath)
|
|
||||||
.attr('data-attachment-name', attachmentName)
|
|
||||||
.attr('data-attachment-mime', getMimeType(abs))
|
|
||||||
.attr('data-attachment-size', stat.size.toString())
|
|
||||||
.attr('data-attachment-id', attachmentId);
|
|
||||||
|
|
||||||
$a.replaceWith($div);
|
|
||||||
unwrapFromParagraph($, $div);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// excalidraw and drawio
|
|
||||||
for (const type of ['excalidraw', 'drawio'] as const) {
|
|
||||||
for (const el of $(`div[data-type="${type}"]`).toArray()) {
|
|
||||||
const $oldDiv = $(el);
|
|
||||||
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
|
|
||||||
if (!rawSrc || rawSrc.startsWith('http')) continue;
|
|
||||||
|
|
||||||
const relPath = resolveRelativeAttachmentPath(
|
|
||||||
rawSrc,
|
|
||||||
pageDir,
|
|
||||||
attachmentCandidates,
|
|
||||||
);
|
|
||||||
if (!relPath) continue;
|
|
||||||
|
|
||||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
|
||||||
const stat = await fs.stat(abs);
|
|
||||||
const fileName = path.basename(abs);
|
|
||||||
|
|
||||||
const width = $oldDiv.attr('data-width') || '100%';
|
|
||||||
const align = $oldDiv.attr('data-align') || 'center';
|
|
||||||
|
|
||||||
const $newDiv = $('<div>')
|
|
||||||
.attr('data-type', type)
|
|
||||||
.attr('data-src', apiFilePath)
|
|
||||||
.attr('data-title', fileName)
|
|
||||||
.attr('data-width', width)
|
|
||||||
.attr('data-size', stat.size.toString())
|
|
||||||
.attr('data-align', align)
|
|
||||||
.attr('data-attachment-id', attachmentId);
|
|
||||||
|
|
||||||
$oldDiv.replaceWith($newDiv);
|
|
||||||
unwrapFromParagraph($, $newDiv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// wait for all uploads & DB inserts
|
|
||||||
await Promise.all(attachmentTasks);
|
|
||||||
|
|
||||||
return $.root().html() || '';
|
|
||||||
}
|
|
||||||
|
|
||||||
async updateTaskStatus(fileTaskId: string, status: FileTaskStatus) {
|
|
||||||
await this.db
|
|
||||||
.updateTable('fileTasks')
|
|
||||||
.set({ status: status })
|
|
||||||
.where('id', '=', fileTaskId)
|
|
||||||
.execute();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -21,7 +21,7 @@ import {
|
|||||||
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
|
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
|
||||||
import * as bytes from 'bytes';
|
import * as bytes from 'bytes';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { ImportService } from './import.service';
|
import { ImportService } from './services/import.service';
|
||||||
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
|
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
|
||||||
|
|
||||||
@Controller()
|
@Controller()
|
||||||
|
|||||||
@ -1,12 +1,19 @@
|
|||||||
import { Module } from '@nestjs/common';
|
import { Module } from '@nestjs/common';
|
||||||
import { ImportService } from './import.service';
|
import { ImportService } from './services/import.service';
|
||||||
import { ImportController } from './import.controller';
|
import { ImportController } from './import.controller';
|
||||||
import { StorageModule } from '../storage/storage.module';
|
import { StorageModule } from '../storage/storage.module';
|
||||||
import { FileTaskService } from './file-task.service';
|
import { FileTaskService } from './services/file-task.service';
|
||||||
import { FileTaskProcessor } from './processors/file-task.processor';
|
import { FileTaskProcessor } from './processors/file-task.processor';
|
||||||
|
import { ImportAttachmentService } from './services/import-attachment.service';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
providers: [ImportService, FileTaskService, FileTaskProcessor],
|
providers: [
|
||||||
|
ImportService,
|
||||||
|
FileTaskService,
|
||||||
|
FileTaskProcessor,
|
||||||
|
ImportAttachmentService,
|
||||||
|
],
|
||||||
|
exports: [ImportService, ImportAttachmentService],
|
||||||
controllers: [ImportController],
|
controllers: [ImportController],
|
||||||
imports: [StorageModule],
|
imports: [StorageModule],
|
||||||
})
|
})
|
||||||
|
|||||||
@ -2,7 +2,7 @@ import { Logger, OnModuleDestroy } from '@nestjs/common';
|
|||||||
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
|
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
|
||||||
import { Job } from 'bullmq';
|
import { Job } from 'bullmq';
|
||||||
import { QueueJob, QueueName } from 'src/integrations/queue/constants';
|
import { QueueJob, QueueName } from 'src/integrations/queue/constants';
|
||||||
import { FileTaskService } from '../file-task.service';
|
import { FileTaskService } from '../services/file-task.service';
|
||||||
|
|
||||||
@Processor(QueueName.FILE_TASK_QUEUE)
|
@Processor(QueueName.FILE_TASK_QUEUE)
|
||||||
export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
|
||||||
|
|||||||
@ -0,0 +1,289 @@
|
|||||||
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { jsonToText } from '../../../collaboration/collaboration.util';
|
||||||
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
|
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||||
|
import {
|
||||||
|
extractZip,
|
||||||
|
FileImportType,
|
||||||
|
FileTaskStatus,
|
||||||
|
} from '../utils/file.utils';
|
||||||
|
import { StorageService } from '../../storage/storage.service';
|
||||||
|
import * as tmp from 'tmp-promise';
|
||||||
|
import { pipeline } from 'node:stream/promises';
|
||||||
|
import { createWriteStream } from 'node:fs';
|
||||||
|
import { ImportService } from './import.service';
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import { generateSlugId } from '../../../common/helpers';
|
||||||
|
import { v7 } from 'uuid';
|
||||||
|
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||||
|
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||||
|
import { markdownToHtml } from '@docmost/editor-ext';
|
||||||
|
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||||
|
import { formatImportHtml } from '../utils/import-formatter';
|
||||||
|
import {
|
||||||
|
buildAttachmentCandidates,
|
||||||
|
collectMarkdownAndHtmlFiles,
|
||||||
|
} from '../utils/import.utils';
|
||||||
|
import { executeTx } from '@docmost/db/utils';
|
||||||
|
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
||||||
|
import { ImportAttachmentService } from './import-attachment.service';
|
||||||
|
import { ModuleRef } from '@nestjs/core';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class FileTaskService {
|
||||||
|
private readonly logger = new Logger(FileTaskService.name);
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private readonly storageService: StorageService,
|
||||||
|
private readonly importService: ImportService,
|
||||||
|
private readonly backlinkRepo: BacklinkRepo,
|
||||||
|
@InjectKysely() private readonly db: KyselyDB,
|
||||||
|
private readonly importAttachmentService: ImportAttachmentService,
|
||||||
|
// private readonly confluenceTaskService: ConfluenceImportService,
|
||||||
|
private moduleRef: ModuleRef,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
async processZIpImport(fileTaskId: string): Promise<void> {
|
||||||
|
const fileTask = await this.db
|
||||||
|
.selectFrom('fileTasks')
|
||||||
|
.selectAll()
|
||||||
|
.where('id', '=', fileTaskId)
|
||||||
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
if (!fileTask) {
|
||||||
|
this.logger.log(`Import file task with ID ${fileTaskId} not found`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileTask.status === FileTaskStatus.Success) {
|
||||||
|
this.logger.log('Imported task already processed.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { path: tmpZipPath, cleanup: cleanupTmpFile } = await tmp.file({
|
||||||
|
prefix: 'docmost-import',
|
||||||
|
postfix: '.zip',
|
||||||
|
discardDescriptor: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const { path: tmpExtractDir, cleanup: cleanupTmpDir } = await tmp.dir({
|
||||||
|
prefix: 'docmost-extract-',
|
||||||
|
unsafeCleanup: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const fileStream = await this.storageService.readStream(fileTask.filePath);
|
||||||
|
await pipeline(fileStream, createWriteStream(tmpZipPath));
|
||||||
|
|
||||||
|
await extractZip(tmpZipPath, tmpExtractDir);
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (
|
||||||
|
fileTask.source === FileImportType.Generic ||
|
||||||
|
fileTask.source === FileImportType.Notion
|
||||||
|
) {
|
||||||
|
await this.processGenericImport({
|
||||||
|
extractDir: tmpExtractDir,
|
||||||
|
fileTask,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileTask.source === FileImportType.Confluence) {
|
||||||
|
let ConfluenceModule: any;
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||||
|
ConfluenceModule = require('./../../../ee/confluence-import/confluence-import.service');
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(
|
||||||
|
'Confluence import requested but EE module not bundled in this build',
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const confluenceImportService = this.moduleRef.get(
|
||||||
|
ConfluenceModule.ConfluenceImportService,
|
||||||
|
{ strict: false },
|
||||||
|
);
|
||||||
|
|
||||||
|
await confluenceImportService.processConfluenceImport({
|
||||||
|
extractDir: tmpExtractDir,
|
||||||
|
fileTask,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Success);
|
||||||
|
} catch (error) {
|
||||||
|
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Failed);
|
||||||
|
this.logger.error(error);
|
||||||
|
} finally {
|
||||||
|
await cleanupTmpFile();
|
||||||
|
await cleanupTmpDir();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async processGenericImport(opts: {
|
||||||
|
extractDir: string;
|
||||||
|
fileTask: FileTask;
|
||||||
|
}): Promise<void> {
|
||||||
|
const { extractDir, fileTask } = opts;
|
||||||
|
const allFiles = await collectMarkdownAndHtmlFiles(extractDir);
|
||||||
|
const attachmentCandidates = await buildAttachmentCandidates(extractDir);
|
||||||
|
|
||||||
|
const pagesMap = new Map<
|
||||||
|
string,
|
||||||
|
{
|
||||||
|
id: string;
|
||||||
|
slugId: string;
|
||||||
|
name: string;
|
||||||
|
content: string;
|
||||||
|
position?: string | null;
|
||||||
|
parentPageId: string | null;
|
||||||
|
fileExtension: string;
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
>();
|
||||||
|
|
||||||
|
for (const absPath of allFiles) {
|
||||||
|
const relPath = path
|
||||||
|
.relative(extractDir, absPath)
|
||||||
|
.split(path.sep)
|
||||||
|
.join('/'); // normalize to forward-slashes
|
||||||
|
const ext = path.extname(relPath).toLowerCase();
|
||||||
|
let content = await fs.readFile(absPath, 'utf-8');
|
||||||
|
|
||||||
|
if (ext.toLowerCase() === '.md') {
|
||||||
|
content = await markdownToHtml(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
pagesMap.set(relPath, {
|
||||||
|
id: v7(),
|
||||||
|
slugId: generateSlugId(),
|
||||||
|
name: path.basename(relPath, ext),
|
||||||
|
content,
|
||||||
|
parentPageId: null,
|
||||||
|
fileExtension: ext,
|
||||||
|
filePath: relPath,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// parent/child linking
|
||||||
|
pagesMap.forEach((page, filePath) => {
|
||||||
|
const segments = filePath.split('/');
|
||||||
|
segments.pop();
|
||||||
|
let parentPage = null;
|
||||||
|
while (segments.length) {
|
||||||
|
const tryMd = segments.join('/') + '.md';
|
||||||
|
const tryHtml = segments.join('/') + '.html';
|
||||||
|
if (pagesMap.has(tryMd)) {
|
||||||
|
parentPage = pagesMap.get(tryMd)!;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (pagesMap.has(tryHtml)) {
|
||||||
|
parentPage = pagesMap.get(tryHtml)!;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
segments.pop();
|
||||||
|
}
|
||||||
|
if (parentPage) page.parentPageId = parentPage.id;
|
||||||
|
});
|
||||||
|
|
||||||
|
// generate position keys
|
||||||
|
const siblingsMap = new Map<string | null, typeof Array.prototype>();
|
||||||
|
pagesMap.forEach((page) => {
|
||||||
|
const sibs = siblingsMap.get(page.parentPageId) || [];
|
||||||
|
sibs.push(page);
|
||||||
|
siblingsMap.set(page.parentPageId, sibs);
|
||||||
|
});
|
||||||
|
siblingsMap.forEach((sibs) => {
|
||||||
|
sibs.sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
let prevPos: string | null = null;
|
||||||
|
for (const page of sibs) {
|
||||||
|
page.position = generateJitteredKeyBetween(prevPos, null);
|
||||||
|
prevPos = page.position;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const filePathToPageMetaMap = new Map<
|
||||||
|
string,
|
||||||
|
{ id: string; title: string; slugId: string }
|
||||||
|
>();
|
||||||
|
pagesMap.forEach((page) => {
|
||||||
|
filePathToPageMetaMap.set(page.filePath, {
|
||||||
|
id: page.id,
|
||||||
|
title: page.name,
|
||||||
|
slugId: page.slugId,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const pageResults = await Promise.all(
|
||||||
|
Array.from(pagesMap.values()).map(async (page) => {
|
||||||
|
const htmlContent =
|
||||||
|
await this.importAttachmentService.processAttachments({
|
||||||
|
html: page.content,
|
||||||
|
pageRelativePath: page.filePath,
|
||||||
|
extractDir,
|
||||||
|
pageId: page.id,
|
||||||
|
fileTask,
|
||||||
|
attachmentCandidates,
|
||||||
|
});
|
||||||
|
|
||||||
|
const { html, backlinks } = await formatImportHtml({
|
||||||
|
html: htmlContent,
|
||||||
|
currentFilePath: page.filePath,
|
||||||
|
filePathToPageMetaMap: filePathToPageMetaMap,
|
||||||
|
creatorId: fileTask.creatorId,
|
||||||
|
sourcePageId: page.id,
|
||||||
|
workspaceId: fileTask.workspaceId,
|
||||||
|
});
|
||||||
|
|
||||||
|
const pmState = getProsemirrorContent(
|
||||||
|
await this.importService.processHTML(html),
|
||||||
|
);
|
||||||
|
|
||||||
|
const { title, prosemirrorJson } =
|
||||||
|
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||||
|
|
||||||
|
const insertablePage: InsertablePage = {
|
||||||
|
id: page.id,
|
||||||
|
slugId: page.slugId,
|
||||||
|
title: title || page.name,
|
||||||
|
content: prosemirrorJson,
|
||||||
|
textContent: jsonToText(prosemirrorJson),
|
||||||
|
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
||||||
|
position: page.position!,
|
||||||
|
spaceId: fileTask.spaceId,
|
||||||
|
workspaceId: fileTask.workspaceId,
|
||||||
|
creatorId: fileTask.creatorId,
|
||||||
|
lastUpdatedById: fileTask.creatorId,
|
||||||
|
parentPageId: page.parentPageId,
|
||||||
|
};
|
||||||
|
|
||||||
|
return { insertablePage, backlinks };
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const insertablePages = pageResults.map((r) => r.insertablePage);
|
||||||
|
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
|
||||||
|
|
||||||
|
if (insertablePages.length < 1) return;
|
||||||
|
const validPageIds = new Set(insertablePages.map((row) => row.id));
|
||||||
|
const filteredBacklinks = insertableBacklinks.filter(
|
||||||
|
({ sourcePageId, targetPageId }) =>
|
||||||
|
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
||||||
|
);
|
||||||
|
|
||||||
|
await executeTx(this.db, async (trx) => {
|
||||||
|
await trx.insertInto('pages').values(insertablePages).execute();
|
||||||
|
|
||||||
|
if (filteredBacklinks.length > 0) {
|
||||||
|
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async updateTaskStatus(fileTaskId: string, status: FileTaskStatus) {
|
||||||
|
await this.db
|
||||||
|
.updateTable('fileTasks')
|
||||||
|
.set({ status: status })
|
||||||
|
.where('id', '=', fileTaskId)
|
||||||
|
.execute();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,267 @@
|
|||||||
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
|
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||||
|
import { cleanUrlString } from '../utils/file.utils';
|
||||||
|
import { StorageService } from '../../storage/storage.service';
|
||||||
|
import { createReadStream } from 'node:fs';
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import { getMimeType, sanitizeFileName } from '../../../common/helpers';
|
||||||
|
import { v7 } from 'uuid';
|
||||||
|
import { FileTask } from '@docmost/db/types/entity.types';
|
||||||
|
import { getAttachmentFolderPath } from '../../../core/attachment/attachment.utils';
|
||||||
|
import { AttachmentType } from '../../../core/attachment/attachment.constants';
|
||||||
|
import { unwrapFromParagraph } from '../utils/import-formatter';
|
||||||
|
import { resolveRelativeAttachmentPath } from '../utils/import.utils';
|
||||||
|
import { load } from 'cheerio';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class ImportAttachmentService {
|
||||||
|
private readonly logger = new Logger(ImportAttachmentService.name);
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private readonly storageService: StorageService,
|
||||||
|
@InjectKysely() private readonly db: KyselyDB,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
async processAttachments(opts: {
|
||||||
|
html: string;
|
||||||
|
pageRelativePath: string;
|
||||||
|
extractDir: string;
|
||||||
|
pageId: string;
|
||||||
|
fileTask: FileTask;
|
||||||
|
attachmentCandidates: Map<string, string>;
|
||||||
|
}): Promise<string> {
|
||||||
|
const {
|
||||||
|
html,
|
||||||
|
pageRelativePath,
|
||||||
|
extractDir,
|
||||||
|
pageId,
|
||||||
|
fileTask,
|
||||||
|
attachmentCandidates,
|
||||||
|
} = opts;
|
||||||
|
|
||||||
|
const attachmentTasks: Promise<void>[] = [];
|
||||||
|
|
||||||
|
const processFile = (relPath: string) => {
|
||||||
|
const abs = attachmentCandidates.get(relPath)!;
|
||||||
|
const attachmentId = v7();
|
||||||
|
const ext = path.extname(abs);
|
||||||
|
|
||||||
|
const fileNameWithExt =
|
||||||
|
sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase();
|
||||||
|
|
||||||
|
const storageFilePath = `${getAttachmentFolderPath(AttachmentType.File, fileTask.workspaceId)}/${attachmentId}/${fileNameWithExt}`;
|
||||||
|
|
||||||
|
const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`;
|
||||||
|
|
||||||
|
attachmentTasks.push(
|
||||||
|
(async () => {
|
||||||
|
const fileStream = createReadStream(abs);
|
||||||
|
await this.storageService.uploadStream(storageFilePath, fileStream);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
|
||||||
|
await this.db
|
||||||
|
.insertInto('attachments')
|
||||||
|
.values({
|
||||||
|
id: attachmentId,
|
||||||
|
filePath: storageFilePath,
|
||||||
|
fileName: fileNameWithExt,
|
||||||
|
fileSize: stat.size,
|
||||||
|
mimeType: getMimeType(fileNameWithExt),
|
||||||
|
type: 'file',
|
||||||
|
fileExt: ext,
|
||||||
|
creatorId: fileTask.creatorId,
|
||||||
|
workspaceId: fileTask.workspaceId,
|
||||||
|
pageId,
|
||||||
|
spaceId: fileTask.spaceId,
|
||||||
|
})
|
||||||
|
.execute();
|
||||||
|
})(),
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
attachmentId,
|
||||||
|
storageFilePath,
|
||||||
|
apiFilePath,
|
||||||
|
fileNameWithExt,
|
||||||
|
abs,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const pageDir = path.dirname(pageRelativePath);
|
||||||
|
const $ = load(html);
|
||||||
|
|
||||||
|
// image
|
||||||
|
for (const imgEl of $('img').toArray()) {
|
||||||
|
const $img = $(imgEl);
|
||||||
|
const src = cleanUrlString($img.attr('src') ?? '')!;
|
||||||
|
if (!src || src.startsWith('http')) continue;
|
||||||
|
|
||||||
|
const relPath = resolveRelativeAttachmentPath(
|
||||||
|
src,
|
||||||
|
pageDir,
|
||||||
|
attachmentCandidates,
|
||||||
|
);
|
||||||
|
if (!relPath) continue;
|
||||||
|
|
||||||
|
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
|
||||||
|
const width = $img.attr('width') ?? '100%';
|
||||||
|
const align = $img.attr('data-align') ?? 'center';
|
||||||
|
|
||||||
|
$img
|
||||||
|
.attr('src', apiFilePath)
|
||||||
|
.attr('data-attachment-id', attachmentId)
|
||||||
|
.attr('data-size', stat.size.toString())
|
||||||
|
.attr('width', width)
|
||||||
|
.attr('data-align', align);
|
||||||
|
|
||||||
|
unwrapFromParagraph($, $img);
|
||||||
|
}
|
||||||
|
|
||||||
|
// video
|
||||||
|
for (const vidEl of $('video').toArray()) {
|
||||||
|
const $vid = $(vidEl);
|
||||||
|
const src = cleanUrlString($vid.attr('src') ?? '')!;
|
||||||
|
if (!src || src.startsWith('http')) continue;
|
||||||
|
|
||||||
|
const relPath = resolveRelativeAttachmentPath(
|
||||||
|
src,
|
||||||
|
pageDir,
|
||||||
|
attachmentCandidates,
|
||||||
|
);
|
||||||
|
if (!relPath) continue;
|
||||||
|
|
||||||
|
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
|
||||||
|
const width = $vid.attr('width') ?? '100%';
|
||||||
|
const align = $vid.attr('data-align') ?? 'center';
|
||||||
|
|
||||||
|
$vid
|
||||||
|
.attr('src', apiFilePath)
|
||||||
|
.attr('data-attachment-id', attachmentId)
|
||||||
|
.attr('data-size', stat.size.toString())
|
||||||
|
.attr('width', width)
|
||||||
|
.attr('data-align', align);
|
||||||
|
|
||||||
|
unwrapFromParagraph($, $vid);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const el of $('div[data-type="attachment"]').toArray()) {
|
||||||
|
const $oldDiv = $(el);
|
||||||
|
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
|
||||||
|
if (!rawUrl || rawUrl.startsWith('http')) continue;
|
||||||
|
|
||||||
|
const relPath = resolveRelativeAttachmentPath(
|
||||||
|
rawUrl,
|
||||||
|
pageDir,
|
||||||
|
attachmentCandidates,
|
||||||
|
);
|
||||||
|
if (!relPath) continue;
|
||||||
|
|
||||||
|
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
const fileName = path.basename(abs);
|
||||||
|
const mime = getMimeType(abs);
|
||||||
|
|
||||||
|
const $newDiv = $('<div>')
|
||||||
|
.attr('data-type', 'attachment')
|
||||||
|
.attr('data-attachment-url', apiFilePath)
|
||||||
|
.attr('data-attachment-name', fileName)
|
||||||
|
.attr('data-attachment-mime', mime)
|
||||||
|
.attr('data-attachment-size', stat.size.toString())
|
||||||
|
.attr('data-attachment-id', attachmentId);
|
||||||
|
|
||||||
|
$oldDiv.replaceWith($newDiv);
|
||||||
|
unwrapFromParagraph($, $newDiv);
|
||||||
|
}
|
||||||
|
|
||||||
|
// rewrite other attachments via <a>
|
||||||
|
for (const aEl of $('a').toArray()) {
|
||||||
|
const $a = $(aEl);
|
||||||
|
const href = cleanUrlString($a.attr('href') ?? '')!;
|
||||||
|
if (!href || href.startsWith('http')) continue;
|
||||||
|
|
||||||
|
const relPath = resolveRelativeAttachmentPath(
|
||||||
|
href,
|
||||||
|
pageDir,
|
||||||
|
attachmentCandidates,
|
||||||
|
);
|
||||||
|
if (!relPath) continue;
|
||||||
|
|
||||||
|
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
const ext = path.extname(relPath).toLowerCase();
|
||||||
|
|
||||||
|
if (ext === '.mp4') {
|
||||||
|
const $video = $('<video>')
|
||||||
|
.attr('src', apiFilePath)
|
||||||
|
.attr('data-attachment-id', attachmentId)
|
||||||
|
.attr('data-size', stat.size.toString())
|
||||||
|
.attr('width', '100%')
|
||||||
|
.attr('data-align', 'center');
|
||||||
|
$a.replaceWith($video);
|
||||||
|
unwrapFromParagraph($, $video);
|
||||||
|
} else {
|
||||||
|
// build attachment <div>
|
||||||
|
const confAliasName = $a.attr('data-linked-resource-default-alias');
|
||||||
|
let attachmentName = path.basename(abs);
|
||||||
|
if (confAliasName) attachmentName = confAliasName;
|
||||||
|
|
||||||
|
const $div = $('<div>')
|
||||||
|
.attr('data-type', 'attachment')
|
||||||
|
.attr('data-attachment-url', apiFilePath)
|
||||||
|
.attr('data-attachment-name', attachmentName)
|
||||||
|
.attr('data-attachment-mime', getMimeType(abs))
|
||||||
|
.attr('data-attachment-size', stat.size.toString())
|
||||||
|
.attr('data-attachment-id', attachmentId);
|
||||||
|
|
||||||
|
$a.replaceWith($div);
|
||||||
|
unwrapFromParagraph($, $div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// excalidraw and drawio
|
||||||
|
for (const type of ['excalidraw', 'drawio'] as const) {
|
||||||
|
for (const el of $(`div[data-type="${type}"]`).toArray()) {
|
||||||
|
const $oldDiv = $(el);
|
||||||
|
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
|
||||||
|
if (!rawSrc || rawSrc.startsWith('http')) continue;
|
||||||
|
|
||||||
|
const relPath = resolveRelativeAttachmentPath(
|
||||||
|
rawSrc,
|
||||||
|
pageDir,
|
||||||
|
attachmentCandidates,
|
||||||
|
);
|
||||||
|
if (!relPath) continue;
|
||||||
|
|
||||||
|
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||||
|
const stat = await fs.stat(abs);
|
||||||
|
const fileName = path.basename(abs);
|
||||||
|
|
||||||
|
const width = $oldDiv.attr('data-width') || '100%';
|
||||||
|
const align = $oldDiv.attr('data-align') || 'center';
|
||||||
|
|
||||||
|
const $newDiv = $('<div>')
|
||||||
|
.attr('data-type', type)
|
||||||
|
.attr('data-src', apiFilePath)
|
||||||
|
.attr('data-title', fileName)
|
||||||
|
.attr('data-width', width)
|
||||||
|
.attr('data-size', stat.size.toString())
|
||||||
|
.attr('data-align', align)
|
||||||
|
.attr('data-attachment-id', attachmentId);
|
||||||
|
|
||||||
|
$oldDiv.replaceWith($newDiv);
|
||||||
|
unwrapFromParagraph($, $newDiv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for all uploads & DB inserts
|
||||||
|
await Promise.all(attachmentTasks);
|
||||||
|
|
||||||
|
return $.root().html() || '';
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -7,10 +7,10 @@ import {
|
|||||||
htmlToJson,
|
htmlToJson,
|
||||||
jsonToText,
|
jsonToText,
|
||||||
tiptapExtensions,
|
tiptapExtensions,
|
||||||
} from '../../collaboration/collaboration.util';
|
} from '../../../collaboration/collaboration.util';
|
||||||
import { InjectKysely } from 'nestjs-kysely';
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||||
import { generateSlugId } from '../../common/helpers';
|
import { generateSlugId } from '../../../common/helpers';
|
||||||
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||||
import * as Y from 'yjs';
|
import * as Y from 'yjs';
|
||||||
@ -19,15 +19,16 @@ import {
|
|||||||
FileTaskStatus,
|
FileTaskStatus,
|
||||||
FileTaskType,
|
FileTaskType,
|
||||||
getFileTaskFolderPath,
|
getFileTaskFolderPath,
|
||||||
} from './file.utils';
|
} from '../utils/file.utils';
|
||||||
import { v7, v7 as uuid7 } from 'uuid';
|
import { v7, v7 as uuid7 } from 'uuid';
|
||||||
import { StorageService } from '../storage/storage.service';
|
import { StorageService } from '../../storage/storage.service';
|
||||||
import { InjectQueue } from '@nestjs/bullmq';
|
import { InjectQueue } from '@nestjs/bullmq';
|
||||||
import { Queue } from 'bullmq';
|
import { Queue } from 'bullmq';
|
||||||
import { QueueJob, QueueName } from '../queue/constants';
|
import { QueueJob, QueueName } from '../../queue/constants';
|
||||||
import { Node as PMNode } from '@tiptap/pm/model';
|
import { Node as PMNode } from '@tiptap/pm/model';
|
||||||
import { EditorState, Transaction } from '@tiptap/pm/state';
|
import { EditorState, Transaction } from '@tiptap/pm/state';
|
||||||
import { getSchema } from '@tiptap/core';
|
import { getSchema } from '@tiptap/core';
|
||||||
|
import { FileTask } from '@docmost/db/types/entity.types';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ImportService {
|
export class ImportService {
|
||||||
@ -199,13 +200,14 @@ export class ImportService {
|
|||||||
userId: string,
|
userId: string,
|
||||||
spaceId: string,
|
spaceId: string,
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
): Promise<void> {
|
) {
|
||||||
const file = await filePromise;
|
const file = await filePromise;
|
||||||
const fileBuffer = await file.toBuffer();
|
const fileBuffer = await file.toBuffer();
|
||||||
const fileExtension = path.extname(file.filename).toLowerCase();
|
const fileExtension = path.extname(file.filename).toLowerCase();
|
||||||
const fileName = sanitize(
|
const fileName = sanitize(
|
||||||
path.basename(file.filename, fileExtension).slice(0, 255),
|
path.basename(file.filename, fileExtension).slice(0, 255),
|
||||||
);
|
);
|
||||||
|
const fileSize = fileBuffer.length;
|
||||||
|
|
||||||
const fileTaskId = uuid7();
|
const fileTaskId = uuid7();
|
||||||
const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileName}`;
|
const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileName}`;
|
||||||
@ -213,36 +215,29 @@ export class ImportService {
|
|||||||
// upload file
|
// upload file
|
||||||
await this.storageService.upload(filePath, fileBuffer);
|
await this.storageService.upload(filePath, fileBuffer);
|
||||||
|
|
||||||
// store in fileTasks table
|
const fileTask = await this.db
|
||||||
await this.db
|
|
||||||
.insertInto('fileTasks')
|
.insertInto('fileTasks')
|
||||||
.values({
|
.values({
|
||||||
id: fileTaskId,
|
id: fileTaskId,
|
||||||
type: FileTaskType.Import,
|
type: FileTaskType.Import,
|
||||||
source: source,
|
source: source,
|
||||||
status: FileTaskStatus.Pending,
|
status: FileTaskStatus.Processing,
|
||||||
fileName: fileName,
|
fileName: fileName,
|
||||||
filePath: filePath,
|
filePath: filePath,
|
||||||
fileSize: 0,
|
fileSize: fileSize,
|
||||||
fileExt: 'zip',
|
fileExt: 'zip',
|
||||||
creatorId: userId,
|
creatorId: userId,
|
||||||
spaceId: spaceId,
|
spaceId: spaceId,
|
||||||
workspaceId: workspaceId,
|
workspaceId: workspaceId,
|
||||||
})
|
})
|
||||||
|
.returningAll()
|
||||||
.execute();
|
.execute();
|
||||||
|
|
||||||
// what to send to queue
|
|
||||||
// pass the task ID
|
|
||||||
await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, {
|
await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, {
|
||||||
fileTaskId: fileTaskId,
|
fileTaskId: fileTaskId,
|
||||||
});
|
});
|
||||||
// return tasks info
|
|
||||||
|
|
||||||
// when the processor picks it up
|
return fileTask;
|
||||||
// we change the status to processing
|
|
||||||
// if it gets processed successfully,
|
|
||||||
// we change the status to success
|
|
||||||
// else failed
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async markdownOrHtmlToProsemirror(
|
async markdownOrHtmlToProsemirror(
|
||||||
@ -14,7 +14,6 @@ export enum FileImportType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export enum FileTaskStatus {
|
export enum FileTaskStatus {
|
||||||
Pending = 'pending',
|
|
||||||
Processing = 'processing',
|
Processing = 'processing',
|
||||||
Success = 'success',
|
Success = 'success',
|
||||||
Failed = 'failed',
|
Failed = 'failed',
|
||||||
Reference in New Issue
Block a user