feat: bulk page imports (#1219)

* refactor imports - WIP

* Add readstream

* WIP

* fix attachmentId render

* fix attachmentId render

* turndown video tag

* feat: add stream upload support and improve file handling

- Add stream upload functionality to storage drivers\n- Improve ZIP file extraction with better encoding handling\n- Fix attachment ID rendering issues\n- Add AWS S3 upload stream support\n- Update dependencies for better compatibility

* WIP

* notion formatter

* move embed parser to editor-ext package

* import embeds

* utility files

* cleanup

* Switch from happy-dom to cheerio
* Refine code

* WIP

* bug fixes and UI

* sync

* WIP

* sync

* keep import modal mounted

* Show modal during upload

* WIP

* WIP
This commit is contained in:
Philip Okugbe
2025-06-09 04:29:27 +01:00
committed by GitHub
parent ce1503af85
commit 6d024fc3de
45 changed files with 2362 additions and 149 deletions

View File

@ -67,6 +67,10 @@ export class EnvironmentService {
return this.configService.get<string>('FILE_UPLOAD_SIZE_LIMIT', '50mb');
}
getFileImportSizeLimit(): string {
return this.configService.get<string>('FILE_IMPORT_SIZE_LIMIT', '200mb');
}
getAwsS3AccessKeyId(): string {
return this.configService.get<string>('AWS_S3_ACCESS_KEY_ID');
}

View File

@ -1,5 +1,6 @@
import * as TurndownService from '@joplin/turndown';
import * as TurndownPluginGfm from '@joplin/turndown-plugin-gfm';
import * as path from 'path';
export function turndown(html: string): string {
const turndownService = new TurndownService({
@ -23,6 +24,7 @@ export function turndown(html: string): string {
mathInline,
mathBlock,
iframeEmbed,
video,
]);
return turndownService.turndown(html).replaceAll('<br>', ' ');
}
@ -87,8 +89,12 @@ function preserveDetail(turndownService: TurndownService) {
}
const detailsContent = Array.from(node.childNodes)
.filter(child => child.nodeName !== 'SUMMARY')
.map(child => (child.nodeType === 1 ? turndownService.turndown((child as HTMLElement).outerHTML) : child.textContent))
.filter((child) => child.nodeName !== 'SUMMARY')
.map((child) =>
child.nodeType === 1
? turndownService.turndown((child as HTMLElement).outerHTML)
: child.textContent,
)
.join('');
return `\n<details>\n${detailSummary}\n\n${detailsContent}\n\n</details>\n`;
@ -135,3 +141,16 @@ function iframeEmbed(turndownService: TurndownService) {
},
});
}
function video(turndownService: TurndownService) {
turndownService.addRule('video', {
filter: function (node: HTMLInputElement) {
return node.tagName === 'VIDEO';
},
replacement: function (content: any, node: HTMLInputElement) {
const src = node.getAttribute('src') || '';
const name = path.basename(src);
return '[' + name + '](' + src + ')';
},
});
}

View File

@ -0,0 +1,18 @@
import { IsNotEmpty, IsUUID } from 'class-validator';
export class FileTaskIdDto {
@IsNotEmpty()
@IsUUID()
fileTaskId: string;
}
export type ImportPageNode = {
id: string;
slugId: string;
name: string;
content: string;
position?: string | null;
parentPageId: string | null;
fileExtension: string;
filePath: string;
};

View File

@ -0,0 +1,79 @@
import {
Body,
Controller,
ForbiddenException,
HttpCode,
HttpStatus,
NotFoundException,
Post,
UseGuards,
} from '@nestjs/common';
import SpaceAbilityFactory from '../../core/casl/abilities/space-ability.factory';
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
import { User } from '@docmost/db/types/entity.types';
import {
SpaceCaslAction,
SpaceCaslSubject,
} from '../../core/casl/interfaces/space-ability.type';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { AuthUser } from '../../common/decorators/auth-user.decorator';
import { FileTaskIdDto } from './dto/file-task-dto';
import { SpaceMemberRepo } from '@docmost/db/repos/space/space-member.repo';
@Controller('file-tasks')
export class FileTaskController {
constructor(
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly spaceAbility: SpaceAbilityFactory,
@InjectKysely() private readonly db: KyselyDB,
) {}
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post()
async getFileTasks(@AuthUser() user: User) {
const userSpaceIds = await this.spaceMemberRepo.getUserSpaceIds(user.id);
if (!userSpaceIds || userSpaceIds.length === 0) {
return [];
}
const fileTasks = await this.db
.selectFrom('fileTasks')
.selectAll()
.where('spaceId', 'in', userSpaceIds)
.execute();
if (!fileTasks) {
throw new NotFoundException('File task not found');
}
return fileTasks;
}
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('info')
async getFileTask(@Body() dto: FileTaskIdDto, @AuthUser() user: User) {
const fileTask = await this.db
.selectFrom('fileTasks')
.selectAll()
.where('id', '=', dto.fileTaskId)
.executeTakeFirst();
if (!fileTask || !fileTask.spaceId) {
throw new NotFoundException('File task not found');
}
const ability = await this.spaceAbility.createForUser(
user,
fileTask.spaceId,
);
if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
return fileTask;
}
}

View File

@ -21,8 +21,9 @@ import {
import { FileInterceptor } from '../../common/interceptors/file.interceptor';
import * as bytes from 'bytes';
import * as path from 'path';
import { ImportService } from './import.service';
import { ImportService } from './services/import.service';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
import { EnvironmentService } from '../environment/environment.service';
@Controller()
export class ImportController {
@ -31,6 +32,7 @@ export class ImportController {
constructor(
private readonly importService: ImportService,
private readonly spaceAbility: SpaceAbilityFactory,
private readonly environmentService: EnvironmentService,
) {}
@UseInterceptors(FileInterceptor)
@ -44,18 +46,18 @@ export class ImportController {
) {
const validFileExtensions = ['.md', '.html'];
const maxFileSize = bytes('100mb');
const maxFileSize = bytes('10mb');
let file = null;
try {
file = await req.file({
limits: { fileSize: maxFileSize, fields: 3, files: 1 },
limits: { fileSize: maxFileSize, fields: 4, files: 1 },
});
} catch (err: any) {
this.logger.error(err.message);
if (err?.statusCode === 413) {
throw new BadRequestException(
`File too large. Exceeds the 100mb import limit`,
`File too large. Exceeds the 10mb import limit`,
);
}
}
@ -73,7 +75,7 @@ export class ImportController {
const spaceId = file.fields?.spaceId?.value;
if (!spaceId) {
throw new BadRequestException('spaceId or format not found');
throw new BadRequestException('spaceId is required');
}
const ability = await this.spaceAbility.createForUser(user, spaceId);
@ -83,4 +85,69 @@ export class ImportController {
return this.importService.importPage(file, user.id, spaceId, workspace.id);
}
@UseInterceptors(FileInterceptor)
@UseGuards(JwtAuthGuard)
@HttpCode(HttpStatus.OK)
@Post('pages/import-zip')
async importZip(
@Req() req: any,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
const validFileExtensions = ['.zip'];
const maxFileSize = bytes(this.environmentService.getFileImportSizeLimit());
let file = null;
try {
file = await req.file({
limits: { fileSize: maxFileSize, fields: 3, files: 1 },
});
} catch (err: any) {
this.logger.error(err.message);
if (err?.statusCode === 413) {
throw new BadRequestException(
`File too large. Exceeds the ${this.environmentService.getFileImportSizeLimit()} import limit`,
);
}
}
if (!file) {
throw new BadRequestException('Failed to upload file');
}
if (
!validFileExtensions.includes(path.extname(file.filename).toLowerCase())
) {
throw new BadRequestException('Invalid import file extension.');
}
const spaceId = file.fields?.spaceId?.value;
const source = file.fields?.source?.value;
const validZipSources = ['generic', 'notion', 'confluence'];
if (!validZipSources.includes(source)) {
throw new BadRequestException(
'Invalid import source. Import source must either be generic, notion or confluence.',
);
}
if (!spaceId) {
throw new BadRequestException('spaceId is required');
}
const ability = await this.spaceAbility.createForUser(user, spaceId);
if (ability.cannot(SpaceCaslAction.Edit, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
return this.importService.importZip(
file,
source,
user.id,
spaceId,
workspace.id,
);
}
}

View File

@ -1,9 +1,22 @@
import { Module } from '@nestjs/common';
import { ImportService } from './import.service';
import { ImportService } from './services/import.service';
import { ImportController } from './import.controller';
import { StorageModule } from '../storage/storage.module';
import { FileTaskService } from './services/file-task.service';
import { FileTaskProcessor } from './processors/file-task.processor';
import { ImportAttachmentService } from './services/import-attachment.service';
import { FileTaskController } from './file-task.controller';
import { PageModule } from '../../core/page/page.module';
@Module({
providers: [ImportService],
controllers: [ImportController],
providers: [
ImportService,
FileTaskService,
FileTaskProcessor,
ImportAttachmentService,
],
exports: [ImportService, ImportAttachmentService],
controllers: [ImportController, FileTaskController],
imports: [StorageModule, PageModule],
})
export class ImportModule {}

View File

@ -0,0 +1,76 @@
import { Logger, OnModuleDestroy } from '@nestjs/common';
import { OnWorkerEvent, Processor, WorkerHost } from '@nestjs/bullmq';
import { Job } from 'bullmq';
import { QueueJob, QueueName } from 'src/integrations/queue/constants';
import { FileTaskService } from '../services/file-task.service';
import { FileTaskStatus } from '../utils/file.utils';
import { StorageService } from '../../storage/storage.service';
@Processor(QueueName.FILE_TASK_QUEUE)
export class FileTaskProcessor extends WorkerHost implements OnModuleDestroy {
private readonly logger = new Logger(FileTaskProcessor.name);
constructor(
private readonly fileTaskService: FileTaskService,
private readonly storageService: StorageService,
) {
super();
}
async process(job: Job<any, void>): Promise<void> {
try {
switch (job.name) {
case QueueJob.IMPORT_TASK:
await this.fileTaskService.processZIpImport(job.data.fileTaskId);
break;
case QueueJob.EXPORT_TASK:
// TODO: export task
break;
}
} catch (err) {
this.logger.error('File task failed', err);
throw err;
}
}
@OnWorkerEvent('active')
onActive(job: Job) {
this.logger.debug(`Processing ${job.name} job`);
}
@OnWorkerEvent('failed')
async onFailed(job: Job) {
this.logger.error(
`Error processing ${job.name} job. Reason: ${job.failedReason}`,
);
try {
const fileTaskId = job.data.fileTaskId;
await this.fileTaskService.updateTaskStatus(
fileTaskId,
FileTaskStatus.Failed,
job.failedReason,
);
const fileTask = await this.fileTaskService.getFileTask(fileTaskId);
if (fileTask) {
await this.storageService.delete(fileTask.filePath);
}
} catch (err) {
this.logger.error(err);
}
}
@OnWorkerEvent('completed')
onCompleted(job: Job) {
this.logger.log(
`Completed ${job.name} job for File task ID ${job.data.fileTaskId}`,
);
}
async onModuleDestroy(): Promise<void> {
if (this.worker) {
await this.worker.close();
}
}
}

View File

@ -0,0 +1,346 @@
import { Injectable, Logger } from '@nestjs/common';
import * as path from 'path';
import { jsonToText } from '../../../collaboration/collaboration.util';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import {
extractZip,
FileImportSource,
FileTaskStatus,
} from '../utils/file.utils';
import { StorageService } from '../../storage/storage.service';
import * as tmp from 'tmp-promise';
import { pipeline } from 'node:stream/promises';
import { createWriteStream } from 'node:fs';
import { ImportService } from './import.service';
import { promises as fs } from 'fs';
import { generateSlugId } from '../../../common/helpers';
import { v7 } from 'uuid';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
collectMarkdownAndHtmlFiles,
} from '../utils/import.utils';
import { executeTx } from '@docmost/db/utils';
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
import { ImportAttachmentService } from './import-attachment.service';
import { ModuleRef } from '@nestjs/core';
import { PageService } from '../../../core/page/services/page.service';
import { ImportPageNode } from '../dto/file-task-dto';
@Injectable()
export class FileTaskService {
private readonly logger = new Logger(FileTaskService.name);
constructor(
private readonly storageService: StorageService,
private readonly importService: ImportService,
private readonly pageService: PageService,
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
private moduleRef: ModuleRef,
) {}
async processZIpImport(fileTaskId: string): Promise<void> {
const fileTask = await this.db
.selectFrom('fileTasks')
.selectAll()
.where('id', '=', fileTaskId)
.executeTakeFirst();
if (!fileTask) {
this.logger.log(`Import file task with ID ${fileTaskId} not found`);
return;
}
if (fileTask.status === FileTaskStatus.Failed) {
return;
}
if (fileTask.status === FileTaskStatus.Success) {
this.logger.log('Imported task already processed.');
return;
}
const { path: tmpZipPath, cleanup: cleanupTmpFile } = await tmp.file({
prefix: 'docmost-import',
postfix: '.zip',
discardDescriptor: true,
});
const { path: tmpExtractDir, cleanup: cleanupTmpDir } = await tmp.dir({
prefix: 'docmost-extract-',
unsafeCleanup: true,
});
try {
const fileStream = await this.storageService.readStream(
fileTask.filePath,
);
await pipeline(fileStream, createWriteStream(tmpZipPath));
await extractZip(tmpZipPath, tmpExtractDir);
} catch (err) {
await cleanupTmpFile();
await cleanupTmpDir();
throw err;
}
try {
if (
fileTask.source === FileImportSource.Generic ||
fileTask.source === FileImportSource.Notion
) {
await this.processGenericImport({
extractDir: tmpExtractDir,
fileTask,
});
}
if (fileTask.source === FileImportSource.Confluence) {
let ConfluenceModule: any;
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
ConfluenceModule = require('./../../../ee/confluence-import/confluence-import.service');
} catch (err) {
this.logger.error(
'Confluence import requested but EE module not bundled in this build',
);
return;
}
const confluenceImportService = this.moduleRef.get(
ConfluenceModule.ConfluenceImportService,
{ strict: false },
);
await confluenceImportService.processConfluenceImport({
extractDir: tmpExtractDir,
fileTask,
});
}
try {
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Success, null);
await cleanupTmpFile();
await cleanupTmpDir();
// delete stored file on success
await this.storageService.delete(fileTask.filePath);
} catch (err) {
this.logger.error(
`Failed to delete import file from storage. Task ID: ${fileTaskId}`,
err,
);
}
} catch (err) {
await cleanupTmpFile();
await cleanupTmpDir();
throw err;
}
}
async processGenericImport(opts: {
extractDir: string;
fileTask: FileTask;
}): Promise<void> {
const { extractDir, fileTask } = opts;
const allFiles = await collectMarkdownAndHtmlFiles(extractDir);
const attachmentCandidates = await buildAttachmentCandidates(extractDir);
const pagesMap = new Map<string, ImportPageNode>();
for (const absPath of allFiles) {
const relPath = path
.relative(extractDir, absPath)
.split(path.sep)
.join('/'); // normalize to forward-slashes
const ext = path.extname(relPath).toLowerCase();
let content = await fs.readFile(absPath, 'utf-8');
if (ext.toLowerCase() === '.md') {
content = await markdownToHtml(content);
}
pagesMap.set(relPath, {
id: v7(),
slugId: generateSlugId(),
name: path.basename(relPath, ext),
content,
parentPageId: null,
fileExtension: ext,
filePath: relPath,
});
}
// parent/child linking
pagesMap.forEach((page, filePath) => {
const segments = filePath.split('/');
segments.pop();
let parentPage = null;
while (segments.length) {
const tryMd = segments.join('/') + '.md';
const tryHtml = segments.join('/') + '.html';
if (pagesMap.has(tryMd)) {
parentPage = pagesMap.get(tryMd)!;
break;
}
if (pagesMap.has(tryHtml)) {
parentPage = pagesMap.get(tryHtml)!;
break;
}
segments.pop();
}
if (parentPage) page.parentPageId = parentPage.id;
});
// generate position keys
const siblingsMap = new Map<string | null, ImportPageNode[]>();
pagesMap.forEach((page) => {
const group = siblingsMap.get(page.parentPageId) ?? [];
group.push(page);
siblingsMap.set(page.parentPageId, group);
});
// get root pages
const rootSibs = siblingsMap.get(null);
if (rootSibs?.length) {
rootSibs.sort((a, b) => a.name.localeCompare(b.name));
// get first position key from the server
const nextPosition = await this.pageService.nextPagePosition(
fileTask.spaceId,
);
let prevPos: string | null = null;
rootSibs.forEach((page, idx) => {
if (idx === 0) {
page.position = nextPosition;
} else {
page.position = generateJitteredKeyBetween(prevPos, null);
}
prevPos = page.position;
});
}
// non-root buckets (children & deeper levels)
siblingsMap.forEach((sibs, parentId) => {
if (parentId === null) return; // root already done
sibs.sort((a, b) => a.name.localeCompare(b.name));
let prevPos: string | null = null;
for (const page of sibs) {
page.position = generateJitteredKeyBetween(prevPos, null);
prevPos = page.position;
}
});
// internal page links
const filePathToPageMetaMap = new Map<
string,
{ id: string; title: string; slugId: string }
>();
pagesMap.forEach((page) => {
filePathToPageMetaMap.set(page.filePath, {
id: page.id,
title: page.name,
slugId: page.slugId,
});
});
const pageResults = await Promise.all(
Array.from(pagesMap.values()).map(async (page) => {
const htmlContent =
await this.importAttachmentService.processAttachments({
html: page.content,
pageRelativePath: page.filePath,
extractDir,
pageId: page.id,
fileTask,
attachmentCandidates,
});
const { html, backlinks } = await formatImportHtml({
html: htmlContent,
currentFilePath: page.filePath,
filePathToPageMetaMap: filePathToPageMetaMap,
creatorId: fileTask.creatorId,
sourcePageId: page.id,
workspaceId: fileTask.workspaceId,
});
const pmState = getProsemirrorContent(
await this.importService.processHTML(html),
);
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,
title: title || page.name,
content: prosemirrorJson,
textContent: jsonToText(prosemirrorJson),
ydoc: await this.importService.createYdoc(prosemirrorJson),
position: page.position!,
spaceId: fileTask.spaceId,
workspaceId: fileTask.workspaceId,
creatorId: fileTask.creatorId,
lastUpdatedById: fileTask.creatorId,
parentPageId: page.parentPageId,
};
return { insertablePage, backlinks };
}),
);
const insertablePages = pageResults.map((r) => r.insertablePage);
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
if (insertablePages.length < 1) return;
const validPageIds = new Set(insertablePages.map((row) => row.id));
const filteredBacklinks = insertableBacklinks.filter(
({ sourcePageId, targetPageId }) =>
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
);
await executeTx(this.db, async (trx) => {
await trx.insertInto('pages').values(insertablePages).execute();
if (filteredBacklinks.length > 0) {
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
}
});
}
async getFileTask(fileTaskId: string) {
return this.db
.selectFrom('fileTasks')
.selectAll()
.where('id', '=', fileTaskId)
.executeTakeFirst();
}
async updateTaskStatus(
fileTaskId: string,
status: FileTaskStatus,
errorMessage?: string,
) {
try {
await this.db
.updateTable('fileTasks')
.set({ status: status, errorMessage, updatedAt: new Date() })
.where('id', '=', fileTaskId)
.execute();
} catch (err) {
this.logger.error(err);
}
}
}

View File

@ -0,0 +1,303 @@
import { Injectable, Logger } from '@nestjs/common';
import * as path from 'path';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { cleanUrlString } from '../utils/file.utils';
import { StorageService } from '../../storage/storage.service';
import { createReadStream } from 'node:fs';
import { promises as fs } from 'fs';
import { getMimeType, sanitizeFileName } from '../../../common/helpers';
import { v7 } from 'uuid';
import { FileTask } from '@docmost/db/types/entity.types';
import { getAttachmentFolderPath } from '../../../core/attachment/attachment.utils';
import { AttachmentType } from '../../../core/attachment/attachment.constants';
import { unwrapFromParagraph } from '../utils/import-formatter';
import { resolveRelativeAttachmentPath } from '../utils/import.utils';
import { load } from 'cheerio';
@Injectable()
export class ImportAttachmentService {
private readonly logger = new Logger(ImportAttachmentService.name);
constructor(
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
) {}
async processAttachments(opts: {
html: string;
pageRelativePath: string;
extractDir: string;
pageId: string;
fileTask: FileTask;
attachmentCandidates: Map<string, string>;
}): Promise<string> {
const {
html,
pageRelativePath,
extractDir,
pageId,
fileTask,
attachmentCandidates,
} = opts;
const attachmentTasks: Promise<void>[] = [];
/**
* Cache keyed by the *relative* path that appears in the HTML.
* Ensures we upload (and DB-insert) each attachment at most once,
* even if its referenced multiple times on the page.
*/
const processed = new Map<
string,
{
attachmentId: string;
storageFilePath: string;
apiFilePath: string;
fileNameWithExt: string;
abs: string;
}
>();
const uploadOnce = (relPath: string) => {
const abs = attachmentCandidates.get(relPath)!;
const attachmentId = v7();
const ext = path.extname(abs);
const fileNameWithExt =
sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase();
const storageFilePath = `${getAttachmentFolderPath(
AttachmentType.File,
fileTask.workspaceId,
)}/${attachmentId}/${fileNameWithExt}`;
const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`;
attachmentTasks.push(
(async () => {
const fileStream = createReadStream(abs);
await this.storageService.uploadStream(storageFilePath, fileStream);
const stat = await fs.stat(abs);
await this.db
.insertInto('attachments')
.values({
id: attachmentId,
filePath: storageFilePath,
fileName: fileNameWithExt,
fileSize: stat.size,
mimeType: getMimeType(fileNameWithExt),
type: 'file',
fileExt: ext,
creatorId: fileTask.creatorId,
workspaceId: fileTask.workspaceId,
pageId,
spaceId: fileTask.spaceId,
})
.execute();
})(),
);
return {
attachmentId,
storageFilePath,
apiFilePath,
fileNameWithExt,
abs,
};
};
/**
* Returns cached data if weve already processed this path.
* Otherwise calls `uploadOnce`, stores the result, and returns it.
*/
const processFile = (relPath: string) => {
const cached = processed.get(relPath);
if (cached) return cached;
const fresh = uploadOnce(relPath);
processed.set(relPath, fresh);
return fresh;
};
const pageDir = path.dirname(pageRelativePath);
const $ = load(html);
// image
for (const imgEl of $('img').toArray()) {
const $img = $(imgEl);
const src = cleanUrlString($img.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
src,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await fs.stat(abs);
const width = $img.attr('width') ?? '100%';
const align = $img.attr('data-align') ?? 'center';
$img
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', width)
.attr('data-align', align);
unwrapFromParagraph($, $img);
}
// video
for (const vidEl of $('video').toArray()) {
const $vid = $(vidEl);
const src = cleanUrlString($vid.attr('src') ?? '')!;
if (!src || src.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
src,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await fs.stat(abs);
const width = $vid.attr('width') ?? '100%';
const align = $vid.attr('data-align') ?? 'center';
$vid
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', width)
.attr('data-align', align);
unwrapFromParagraph($, $vid);
}
// <div data-type="attachment">
for (const el of $('div[data-type="attachment"]').toArray()) {
const $oldDiv = $(el);
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
if (!rawUrl || rawUrl.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
rawUrl,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await fs.stat(abs);
const fileName = path.basename(abs);
const mime = getMimeType(abs);
const $newDiv = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', fileName)
.attr('data-attachment-mime', mime)
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId);
$oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
// rewrite other attachments via <a>
for (const aEl of $('a').toArray()) {
const $a = $(aEl);
const href = cleanUrlString($a.attr('href') ?? '')!;
if (!href || href.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
href,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await fs.stat(abs);
const ext = path.extname(relPath).toLowerCase();
if (ext === '.mp4') {
const $video = $('<video>')
.attr('src', apiFilePath)
.attr('data-attachment-id', attachmentId)
.attr('data-size', stat.size.toString())
.attr('width', '100%')
.attr('data-align', 'center');
$a.replaceWith($video);
unwrapFromParagraph($, $video);
} else {
const confAliasName = $a.attr('data-linked-resource-default-alias');
let attachmentName = path.basename(abs);
if (confAliasName) attachmentName = confAliasName;
const $div = $('<div>')
.attr('data-type', 'attachment')
.attr('data-attachment-url', apiFilePath)
.attr('data-attachment-name', attachmentName)
.attr('data-attachment-mime', getMimeType(abs))
.attr('data-attachment-size', stat.size.toString())
.attr('data-attachment-id', attachmentId);
$a.replaceWith($div);
unwrapFromParagraph($, $div);
}
}
// excalidraw and drawio
for (const type of ['excalidraw', 'drawio'] as const) {
for (const el of $(`div[data-type="${type}"]`).toArray()) {
const $oldDiv = $(el);
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
if (!rawSrc || rawSrc.startsWith('http')) continue;
const relPath = resolveRelativeAttachmentPath(
rawSrc,
pageDir,
attachmentCandidates,
);
if (!relPath) continue;
const { attachmentId, apiFilePath, abs } = processFile(relPath);
const stat = await fs.stat(abs);
const fileName = path.basename(abs);
const width = $oldDiv.attr('data-width') || '100%';
const align = $oldDiv.attr('data-align') || 'center';
const $newDiv = $('<div>')
.attr('data-type', type)
.attr('data-src', apiFilePath)
.attr('data-title', fileName)
.attr('data-width', width)
.attr('data-size', stat.size.toString())
.attr('data-align', align)
.attr('data-attachment-id', attachmentId);
$oldDiv.replaceWith($newDiv);
unwrapFromParagraph($, $newDiv);
}
}
// wait for all uploads & DB inserts
try {
await Promise.all(attachmentTasks);
} catch (err) {
this.logger.log('Import attachment upload error', err);
}
return $.root().html() || '';
}
}

View File

@ -4,16 +4,27 @@ import { MultipartFile } from '@fastify/multipart';
import { sanitize } from 'sanitize-filename-ts';
import * as path from 'path';
import {
htmlToJson, jsonToText,
htmlToJson,
jsonToText,
tiptapExtensions,
} from '../../collaboration/collaboration.util';
} from '../../../collaboration/collaboration.util';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { generateSlugId } from '../../common/helpers';
import { generateSlugId, sanitizeFileName } from '../../../common/helpers';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { TiptapTransformer } from '@hocuspocus/transformer';
import * as Y from 'yjs';
import { markdownToHtml } from "@docmost/editor-ext";
import { markdownToHtml } from '@docmost/editor-ext';
import {
FileTaskStatus,
FileTaskType,
getFileTaskFolderPath,
} from '../utils/file.utils';
import { v7 as uuid7 } from 'uuid';
import { StorageService } from '../../storage/storage.service';
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueJob, QueueName } from '../../queue/constants';
@Injectable()
export class ImportService {
@ -21,7 +32,10 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
) {}
async importPage(
@ -113,7 +127,7 @@ export class ImportService {
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
if (prosemirrorJson) {
this.logger.debug(`Converting prosemirror json state to ydoc`);
// this.logger.debug(`Converting prosemirror json state to ydoc`);
const ydoc = TiptapTransformer.toYdoc(
prosemirrorJson,
@ -129,20 +143,34 @@ export class ImportService {
}
extractTitleAndRemoveHeading(prosemirrorState: any) {
let title = null;
let title: string | null = null;
const content = prosemirrorState.content ?? [];
if (
prosemirrorState?.content?.length > 0 &&
prosemirrorState.content[0].type === 'heading' &&
prosemirrorState.content[0].attrs?.level === 1
content.length > 0 &&
content[0].type === 'heading' &&
content[0].attrs?.level === 1
) {
title = prosemirrorState.content[0].content[0].text;
// remove h1 header node from state
prosemirrorState.content.shift();
title = content[0].content?.[0]?.text ?? null;
content.shift();
}
return { title, prosemirrorJson: prosemirrorState };
// ensure at least one paragraph
if (content.length === 0) {
content.push({
type: 'paragraph',
content: [],
});
}
return {
title,
prosemirrorJson: {
...prosemirrorState,
content,
},
};
}
async getNewPagePosition(spaceId: string): Promise<string> {
@ -161,4 +189,52 @@ export class ImportService {
return generateJitteredKeyBetween(null, null);
}
}
async importZip(
filePromise: Promise<MultipartFile>,
source: string,
userId: string,
spaceId: string,
workspaceId: string,
) {
const file = await filePromise;
const fileBuffer = await file.toBuffer();
const fileExtension = path.extname(file.filename).toLowerCase();
const fileName = sanitizeFileName(
path.basename(file.filename, fileExtension),
);
const fileSize = fileBuffer.length;
const fileNameWithExt = fileName + fileExtension;
const fileTaskId = uuid7();
const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileNameWithExt}`;
// upload file
await this.storageService.upload(filePath, fileBuffer);
const fileTask = await this.db
.insertInto('fileTasks')
.values({
id: fileTaskId,
type: FileTaskType.Import,
source: source,
status: FileTaskStatus.Processing,
fileName: fileNameWithExt,
filePath: filePath,
fileSize: fileSize,
fileExt: 'zip',
creatorId: userId,
spaceId: spaceId,
workspaceId: workspaceId,
})
.returningAll()
.executeTakeFirst();
await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, {
fileTaskId: fileTaskId,
});
return fileTask;
}
}

View File

@ -0,0 +1,187 @@
import * as yauzl from 'yauzl';
import * as path from 'path';
import * as fs from 'node:fs';
export enum FileTaskType {
Import = 'import',
Export = 'export',
}
export enum FileImportSource {
Generic = 'generic',
Notion = 'notion',
Confluence = 'confluence',
}
export enum FileTaskStatus {
Processing = 'processing',
Success = 'success',
Failed = 'failed',
}
export function getFileTaskFolderPath(
type: FileTaskType,
workspaceId: string,
): string {
switch (type) {
case FileTaskType.Import:
return `${workspaceId}/imports`;
case FileTaskType.Export:
return `${workspaceId}/exports`;
}
}
/**
* Extracts a ZIP archive.
*/
export async function extractZip(
source: string,
target: string,
): Promise<void> {
return extractZipInternal(source, target, true);
}
/**
* Internal helper to extract a ZIP, with optional single-nested-ZIP handling.
* @param source Path to the ZIP file
* @param target Directory to extract into
* @param allowNested Whether to check and unwrap one level of nested ZIP
*/
function extractZipInternal(
source: string,
target: string,
allowNested: boolean,
): Promise<void> {
return new Promise((resolve, reject) => {
yauzl.open(
source,
{ lazyEntries: true, decodeStrings: false, autoClose: true },
(err, zipfile) => {
if (err) return reject(err);
// Handle one level of nested ZIP if allowed
if (allowNested && zipfile.entryCount === 1) {
zipfile.readEntry();
zipfile.once('entry', (entry) => {
const name = entry.fileName.toString('utf8').replace(/^\/+/, '');
const isZip =
!/\/$/.test(entry.fileName) &&
name.toLowerCase().endsWith('.zip');
if (isZip) {
// temporary name to avoid overwriting file
const nestedPath = source.endsWith('.zip')
? source.slice(0, -4) + '.inner.zip'
: source + '.inner.zip';
zipfile.openReadStream(entry, (openErr, rs) => {
if (openErr) return reject(openErr);
const ws = fs.createWriteStream(nestedPath);
rs.on('error', reject);
ws.on('error', reject);
ws.on('finish', () => {
zipfile.close();
extractZipInternal(nestedPath, target, false)
.then(() => {
fs.unlinkSync(nestedPath);
resolve();
})
.catch(reject);
});
rs.pipe(ws);
});
} else {
zipfile.close();
extractZipInternal(source, target, false).then(resolve, reject);
}
});
zipfile.once('error', reject);
return;
}
// Normal extraction
zipfile.readEntry();
zipfile.on('entry', (entry) => {
const name = entry.fileName.toString('utf8');
const safe = name.replace(/^\/+/, '');
if (safe.startsWith('__MACOSX/')) {
zipfile.readEntry();
return;
}
const fullPath = path.join(target, safe);
// Handle directories
if (/\/$/.test(name)) {
try {
fs.mkdirSync(fullPath, { recursive: true });
} catch (mkdirErr: any) {
if (mkdirErr.code === 'ENAMETOOLONG') {
console.warn(`Skipping directory (path too long): ${fullPath}`);
zipfile.readEntry();
return;
}
return reject(mkdirErr);
}
zipfile.readEntry();
return;
}
// Handle files
try {
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
} catch (mkdirErr: any) {
if (mkdirErr.code === 'ENAMETOOLONG') {
console.warn(
`Skipping file directory creation (path too long): ${fullPath}`,
);
zipfile.readEntry();
return;
}
return reject(mkdirErr);
}
zipfile.openReadStream(entry, (openErr, rs) => {
if (openErr) return reject(openErr);
let ws: fs.WriteStream;
try {
ws = fs.createWriteStream(fullPath);
} catch (openWsErr: any) {
if (openWsErr.code === 'ENAMETOOLONG') {
console.warn(
`Skipping file write (path too long): ${fullPath}`,
);
zipfile.readEntry();
return;
}
return reject(openWsErr);
}
rs.on('error', (err) => reject(err));
ws.on('error', (err) => {
if ((err as any).code === 'ENAMETOOLONG') {
console.warn(
`Skipping file write on stream (path too long): ${fullPath}`,
);
zipfile.readEntry();
} else {
reject(err);
}
});
ws.on('finish', () => zipfile.readEntry());
rs.pipe(ws);
});
});
zipfile.on('end', () => resolve());
zipfile.on('error', (err) => reject(err));
},
);
});
}
export function cleanUrlString(url: string): string {
if (!url) return null;
const [mainUrl] = url.split('?', 1);
return mainUrl;
}

View File

@ -0,0 +1,254 @@
import { getEmbedUrlAndProvider } from '@docmost/editor-ext';
import * as path from 'path';
import { v7 } from 'uuid';
import { InsertableBacklink } from '@docmost/db/types/entity.types';
import { Cheerio, CheerioAPI, load } from 'cheerio';
export async function formatImportHtml(opts: {
html: string;
currentFilePath: string;
filePathToPageMetaMap: Map<
string,
{ id: string; title: string; slugId: string }
>;
creatorId: string;
sourcePageId: string;
workspaceId: string;
pageDir?: string;
attachmentCandidates?: string[];
}): Promise<{ html: string; backlinks: InsertableBacklink[] }> {
const {
html,
currentFilePath,
filePathToPageMetaMap,
creatorId,
sourcePageId,
workspaceId,
} = opts;
const $: CheerioAPI = load(html);
const $root: Cheerio<any> = $.root();
notionFormatter($, $root);
defaultHtmlFormatter($, $root);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
currentFilePath,
filePathToPageMetaMap,
creatorId,
sourcePageId,
workspaceId,
);
return {
html: $root.html() || '',
backlinks,
};
}
export function defaultHtmlFormatter($: CheerioAPI, $root: Cheerio<any>) {
$root.find('a[href]').each((_, el) => {
const $el = $(el);
const url = $el.attr('href')!;
const { provider } = getEmbedUrlAndProvider(url);
if (provider === 'iframe') return;
const embed = `<div data-type=\"embed\" data-src=\"${url}\" data-provider=\"${provider}\" data-align=\"center\" data-width=\"640\" data-height=\"480\"></div>`;
$el.replaceWith(embed);
});
$root.find('iframe[src]').each((_, el) => {
const $el = $(el);
const url = $el.attr('src')!;
const { provider } = getEmbedUrlAndProvider(url);
const embed = `<div data-type=\"embed\" data-src=\"${url}\" data-provider=\"${provider}\" data-align=\"center\" data-width=\"640\" data-height=\"480\"></div>`;
$el.replaceWith(embed);
});
}
export function notionFormatter($: CheerioAPI, $root: Cheerio<any>) {
// remove empty description paragraphs
$root.find('p.page-description').each((_, el) => {
if (!$(el).text().trim()) $(el).remove();
});
// block math → mathBlock
$root.find('figure.equation').each((_: any, fig: any) => {
const $fig = $(fig);
const tex = $fig
.find('annotation[encoding="application/x-tex"]')
.text()
.trim();
const $math = $('<div>')
.attr('data-type', 'mathBlock')
.attr('data-katex', 'true')
.text(tex);
$fig.replaceWith($math);
});
// inline math → mathInline
$root.find('span.notion-text-equation-token').each((_, tok) => {
const $tok = $(tok);
const $prev = $tok.prev('style');
if ($prev.length) $prev.remove();
const tex = $tok
.find('annotation[encoding="application/x-tex"]')
.text()
.trim();
const $inline = $('<span>')
.attr('data-type', 'mathInline')
.attr('data-katex', 'true')
.text(tex);
$tok.replaceWith($inline);
});
// callouts
$root
.find('figure.callout')
.get()
.reverse()
.forEach((fig) => {
const $fig = $(fig);
const $content = $fig.find('div').eq(1);
if (!$content.length) return;
const $wrapper = $('<div>')
.attr('data-type', 'callout')
.attr('data-callout-type', 'info');
// @ts-ignore
$content.children().each((_, child) => $wrapper.append(child));
$fig.replaceWith($wrapper);
});
// to-do lists
$root.find('ul.to-do-list').each((_, list) => {
const $old = $(list);
const $new = $('<ul>').attr('data-type', 'taskList');
$old.find('li').each((_, li) => {
const $li = $(li);
const isChecked = $li.find('.checkbox.checkbox-on').length > 0;
const text =
$li
.find('span.to-do-children-unchecked, span.to-do-children-checked')
.first()
.text()
.trim() || '';
const $taskItem = $('<li>')
.attr('data-type', 'taskItem')
.attr('data-checked', String(isChecked));
const $label = $('<label>');
const $input = $('<input>').attr('type', 'checkbox');
if (isChecked) $input.attr('checked', '');
$label.append($input, $('<span>'));
const $container = $('<div>').append($('<p>').text(text));
$taskItem.append($label, $container);
$new.append($taskItem);
});
$old.replaceWith($new);
});
// toggle blocks
$root
.find('ul.toggle details')
.get()
.reverse()
.forEach((det) => {
const $det = $(det);
const $li = $det.closest('li');
if ($li.length) {
$li.before($det);
if (!$li.children().length) $li.remove();
}
const $ul = $det.closest('ul.toggle');
if ($ul.length) {
$ul.before($det);
if (!$ul.children().length) $ul.remove();
}
});
// bookmarks
$root
.find('figure')
.filter((_, fig) => $(fig).find('a.bookmark.source').length > 0)
.get()
.reverse()
.forEach((fig) => {
const $fig = $(fig);
const $link = $fig.find('a.bookmark.source').first();
if (!$link.length) return;
const href = $link.attr('href')!;
const title = $link.find('.bookmark-title').text().trim() || href;
const $newAnchor = $('<a>')
.addClass('bookmark source')
.attr('href', href)
.append($('<div>').addClass('bookmark-info').text(title));
$fig.replaceWith($newAnchor);
});
// remove toc
$root.find('nav.table_of_contents').remove();
}
export function unwrapFromParagraph($: CheerioAPI, $node: Cheerio<any>) {
// find the nearest <p> or <a> ancestor
let $wrapper = $node.closest('p, a');
while ($wrapper.length) {
// if the wrapper has only our node inside, replace it entirely
if ($wrapper.contents().length === 1) {
$wrapper.replaceWith($node);
} else {
// otherwise just move the node to before the wrapper
$wrapper.before($node);
}
// look again for any new wrapper around $node
$wrapper = $node.closest('p, a');
}
}
export async function rewriteInternalLinksToMentionHtml(
$: CheerioAPI,
$root: Cheerio<any>,
currentFilePath: string,
filePathToPageMetaMap: Map<
string,
{ id: string; title: string; slugId: string }
>,
creatorId: string,
sourcePageId: string,
workspaceId: string,
): Promise<InsertableBacklink[]> {
const normalize = (p: string) => p.replace(/\\/g, '/');
const backlinks: InsertableBacklink[] = [];
$root.find('a[href]').each((_, el) => {
const $a = $(el);
const raw = $a.attr('href')!;
if (raw.startsWith('http') || raw.startsWith('/api/')) return;
const resolved = normalize(
path.join(path.dirname(currentFilePath), decodeURIComponent(raw)),
);
const meta = filePathToPageMetaMap.get(resolved);
if (!meta) return;
const mentionId = v7();
const $mention = $('<span>')
.attr({
'data-type': 'mention',
'data-id': mentionId,
'data-entity-type': 'page',
'data-entity-id': meta.id,
'data-label': meta.title,
'data-slug-id': meta.slugId,
'data-creator-id': creatorId,
})
.text(meta.title);
$a.replaceWith($mention);
backlinks.push({ sourcePageId, targetPageId: meta.id, workspaceId });
});
return backlinks;
}

View File

@ -0,0 +1,66 @@
import { promises as fs } from 'fs';
import * as path from 'path';
export async function buildAttachmentCandidates(
extractDir: string,
): Promise<Map<string, string>> {
const map = new Map<string, string>();
async function walk(dir: string) {
for (const ent of await fs.readdir(dir, { withFileTypes: true })) {
const abs = path.join(dir, ent.name);
if (ent.isDirectory()) {
await walk(abs);
} else {
if (['.md', '.html'].includes(path.extname(ent.name).toLowerCase())) {
continue;
}
const rel = path.relative(extractDir, abs).split(path.sep).join('/');
map.set(rel, abs);
}
}
}
await walk(extractDir);
return map;
}
export function resolveRelativeAttachmentPath(
raw: string,
pageDir: string,
attachmentCandidates: Map<string, string>,
): string | null {
const mainRel = decodeURIComponent(raw.replace(/^\.?\/+/, ''));
const fallback = path.normalize(path.join(pageDir, mainRel));
if (attachmentCandidates.has(mainRel)) {
return mainRel;
}
if (attachmentCandidates.has(fallback)) {
return fallback;
}
return null;
}
export async function collectMarkdownAndHtmlFiles(
dir: string,
): Promise<string[]> {
const results: string[] = [];
async function walk(current: string) {
const entries = await fs.readdir(current, { withFileTypes: true });
for (const ent of entries) {
const fullPath = path.join(current, ent.name);
if (ent.isDirectory()) {
await walk(fullPath);
} else if (
['.md', '.html'].includes(path.extname(ent.name).toLowerCase())
) {
results.push(fullPath);
}
}
}
await walk(dir);
return results;
}

View File

@ -3,6 +3,7 @@ export enum QueueName {
ATTACHMENT_QUEUE = '{attachment-queue}',
GENERAL_QUEUE = '{general-queue}',
BILLING_QUEUE = '{billing-queue}',
FILE_TASK_QUEUE = '{file-task-queue}',
}
export enum QueueJob {
@ -19,4 +20,7 @@ export enum QueueJob {
TRIAL_ENDED = 'trial-ended',
WELCOME_EMAIL = 'welcome-email',
FIRST_PAYMENT_EMAIL = 'first-payment-email',
IMPORT_TASK = 'import-task',
EXPORT_TASK = 'export-task',
}

View File

@ -49,6 +49,14 @@ import { BacklinksProcessor } from './processors/backlinks.processor';
BullModule.registerQueue({
name: QueueName.BILLING_QUEUE,
}),
BullModule.registerQueue({
name: QueueName.FILE_TASK_QUEUE,
defaultJobOptions: {
removeOnComplete: true,
removeOnFail: true,
attempts: 1,
},
}),
],
exports: [BullModule],
providers: [BacklinksProcessor],

View File

@ -3,8 +3,11 @@ import {
LocalStorageConfig,
StorageOption,
} from '../interfaces';
import { join } from 'path';
import { join, dirname } from 'path';
import * as fs from 'fs-extra';
import { Readable } from 'stream';
import { createReadStream, createWriteStream } from 'node:fs';
import { pipeline } from 'node:stream/promises';
export class LocalDriver implements StorageDriver {
private readonly config: LocalStorageConfig;
@ -25,6 +28,16 @@ export class LocalDriver implements StorageDriver {
}
}
async uploadStream(filePath: string, file: Readable): Promise<void> {
try {
const fullPath = this._fullPath(filePath);
await fs.mkdir(dirname(fullPath), { recursive: true });
await pipeline(file, createWriteStream(fullPath));
} catch (err) {
throw new Error(`Failed to upload file: ${(err as Error).message}`);
}
}
async copy(fromFilePath: string, toFilePath: string): Promise<void> {
try {
if (await this.exists(fromFilePath)) {
@ -43,6 +56,14 @@ export class LocalDriver implements StorageDriver {
}
}
async readStream(filePath: string): Promise<Readable> {
try {
return createReadStream(this._fullPath(filePath));
} catch (err) {
throw new Error(`Failed to read file: ${(err as Error).message}`);
}
}
async exists(filePath: string): Promise<boolean> {
try {
return await fs.pathExists(this._fullPath(filePath));

View File

@ -12,6 +12,7 @@ import { streamToBuffer } from '../storage.utils';
import { Readable } from 'stream';
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
import { getMimeType } from '../../../common/helpers';
import { Upload } from '@aws-sdk/lib-storage';
export class S3Driver implements StorageDriver {
private readonly s3Client: S3Client;
@ -40,6 +41,26 @@ export class S3Driver implements StorageDriver {
}
}
async uploadStream(filePath: string, file: Readable): Promise<void> {
try {
const contentType = getMimeType(filePath);
const upload = new Upload({
client: this.s3Client,
params: {
Bucket: this.config.bucket,
Key: filePath,
Body: file,
ContentType: contentType,
},
});
await upload.done();
} catch (err) {
throw new Error(`Failed to upload file: ${(err as Error).message}`);
}
}
async copy(fromFilePath: string, toFilePath: string): Promise<void> {
try {
if (await this.exists(fromFilePath)) {
@ -71,6 +92,21 @@ export class S3Driver implements StorageDriver {
}
}
async readStream(filePath: string): Promise<Readable> {
try {
const command = new GetObjectCommand({
Bucket: this.config.bucket,
Key: filePath,
});
const response = await this.s3Client.send(command);
return response.Body as Readable;
} catch (err) {
throw new Error(`Failed to read file from S3: ${(err as Error).message}`);
}
}
async exists(filePath: string): Promise<boolean> {
try {
const command = new HeadObjectCommand({

View File

@ -1,10 +1,16 @@
import { Readable } from 'stream';
export interface StorageDriver {
upload(filePath: string, file: Buffer): Promise<void>;
uploadStream(filePath: string, file: Readable): Promise<void>;
copy(fromFilePath: string, toFilePath: string): Promise<void>;
read(filePath: string): Promise<Buffer>;
readStream(filePath: string): Promise<Readable>;
exists(filePath: string): Promise<boolean>;
getUrl(filePath: string): string;

View File

@ -1,6 +1,7 @@
import { Inject, Injectable, Logger } from '@nestjs/common';
import { STORAGE_DRIVER_TOKEN } from './constants/storage.constants';
import { StorageDriver } from './interfaces';
import { Readable } from 'stream';
@Injectable()
export class StorageService {
@ -14,6 +15,11 @@ export class StorageService {
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
}
async uploadStream(filePath: string, fileContent: Readable) {
await this.storageDriver.uploadStream(filePath, fileContent);
this.logger.debug(`File uploaded successfully. Path: ${filePath}`);
}
async copy(fromFilePath: string, toFilePath: string) {
await this.storageDriver.copy(fromFilePath, toFilePath);
this.logger.debug(`File copied successfully. Path: ${toFilePath}`);
@ -23,6 +29,10 @@ export class StorageService {
return this.storageDriver.read(filePath);
}
async readStream(filePath: string): Promise<Readable> {
return this.storageDriver.readStream(filePath);
}
async exists(filePath: string): Promise<boolean> {
return this.storageDriver.exists(filePath);
}