mirror of
https://github.com/docmost/docmost.git
synced 2025-11-10 03:52:04 +10:00
Compare commits
2 Commits
36a573fce9
...
8143452a21
| Author | SHA1 | Date | |
|---|---|---|---|
| 8143452a21 | |||
| 003b8f5515 |
@ -32,8 +32,27 @@ export function getFileTaskFolderPath(
|
||||
}
|
||||
}
|
||||
|
||||
export function extractZip(source: string, target: string) {
|
||||
//https://github.com/Surfer-Org
|
||||
/**
|
||||
* Extracts a ZIP archive.
|
||||
*/
|
||||
export async function extractZip(
|
||||
source: string,
|
||||
target: string,
|
||||
): Promise<void> {
|
||||
await extractZipInternal(source, target, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper to extract a ZIP, with optional single-nested-ZIP handling.
|
||||
* @param source Path to the ZIP file
|
||||
* @param target Directory to extract into
|
||||
* @param allowNested Whether to check and unwrap one level of nested ZIP
|
||||
*/
|
||||
function extractZipInternal(
|
||||
source: string,
|
||||
target: string,
|
||||
allowNested: boolean,
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
yauzl.open(
|
||||
source,
|
||||
@ -41,54 +60,128 @@ export function extractZip(source: string, target: string) {
|
||||
(err, zipfile) => {
|
||||
if (err) return reject(err);
|
||||
|
||||
// Handle one level of nested ZIP if allowed
|
||||
if (allowNested && zipfile.entryCount === 1) {
|
||||
zipfile.readEntry();
|
||||
zipfile.once('entry', (entry) => {
|
||||
const name = entry.fileName.toString('utf8').replace(/^\/+/, '');
|
||||
const isZip =
|
||||
!/\/$/.test(entry.fileName) &&
|
||||
name.toLowerCase().endsWith('.zip');
|
||||
if (isZip) {
|
||||
const nestedPath = source.endsWith('.zip')
|
||||
? source.slice(0, -4) + '.inner.zip'
|
||||
: source + '.inner.zip';
|
||||
|
||||
zipfile.openReadStream(entry, (openErr, rs) => {
|
||||
if (openErr) return reject(openErr);
|
||||
const ws = fs.createWriteStream(nestedPath);
|
||||
rs.on('error', reject);
|
||||
ws.on('error', reject);
|
||||
ws.on('finish', () => {
|
||||
zipfile.close();
|
||||
extractZipInternal(nestedPath, target, false)
|
||||
.then(() => {
|
||||
fs.unlinkSync(nestedPath);
|
||||
resolve();
|
||||
})
|
||||
.catch(reject);
|
||||
});
|
||||
rs.pipe(ws);
|
||||
});
|
||||
} else {
|
||||
zipfile.close();
|
||||
extractZipInternal(source, target, false).then(resolve, reject);
|
||||
}
|
||||
});
|
||||
zipfile.once('error', reject);
|
||||
return;
|
||||
}
|
||||
|
||||
// Normal extraction
|
||||
zipfile.readEntry();
|
||||
zipfile.on('entry', (entry) => {
|
||||
const name = entry.fileName.toString('utf8'); // or 'cp437' if you need the original DOS charset
|
||||
const safeName = name.replace(/^\/+/, ''); // strip any leading slashes
|
||||
|
||||
const fullPath = path.join(target, safeName);
|
||||
const directory = path.dirname(fullPath);
|
||||
|
||||
// <-- skip all macOS metadata
|
||||
if (safeName.startsWith('__MACOSX/')) {
|
||||
return zipfile.readEntry();
|
||||
const name = entry.fileName.toString('utf8');
|
||||
const safe = name.replace(/^\/+/, '');
|
||||
if (safe.startsWith('__MACOSX/')) {
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
|
||||
if (/\/$/.test(entry.fileName)) {
|
||||
// Directory entry
|
||||
const fullPath = path.join(target, safe);
|
||||
|
||||
// Handle directories
|
||||
if (/\/$/.test(name)) {
|
||||
try {
|
||||
fs.mkdirSync(fullPath, { recursive: true });
|
||||
zipfile.readEntry();
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
} else {
|
||||
// File entry
|
||||
try {
|
||||
fs.mkdirSync(directory, { recursive: true });
|
||||
zipfile.openReadStream(entry, (err, readStream) => {
|
||||
if (err) return reject(err);
|
||||
const writeStream = fs.createWriteStream(fullPath);
|
||||
readStream.on('end', () => {
|
||||
writeStream.end();
|
||||
zipfile.readEntry();
|
||||
});
|
||||
readStream.pipe(writeStream);
|
||||
});
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
} catch (mkdirErr: any) {
|
||||
if (mkdirErr.code === 'ENAMETOOLONG') {
|
||||
console.warn(`Skipping directory (path too long): ${fullPath}`);
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
return reject(mkdirErr);
|
||||
}
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle files
|
||||
try {
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
} catch (mkdirErr: any) {
|
||||
if (mkdirErr.code === 'ENAMETOOLONG') {
|
||||
console.warn(
|
||||
`Skipping file directory creation (path too long): ${fullPath}`,
|
||||
);
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
return reject(mkdirErr);
|
||||
}
|
||||
|
||||
zipfile.openReadStream(entry, (openErr, rs) => {
|
||||
if (openErr) return reject(openErr);
|
||||
|
||||
let ws: fs.WriteStream;
|
||||
try {
|
||||
ws = fs.createWriteStream(fullPath);
|
||||
} catch (openWsErr: any) {
|
||||
if (openWsErr.code === 'ENAMETOOLONG') {
|
||||
console.warn(
|
||||
`Skipping file write (path too long): ${fullPath}`,
|
||||
);
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
return reject(openWsErr);
|
||||
}
|
||||
|
||||
rs.on('error', (err) => reject(err));
|
||||
ws.on('error', (err) => {
|
||||
if ((err as any).code === 'ENAMETOOLONG') {
|
||||
console.warn(
|
||||
`Skipping file write on stream (path too long): ${fullPath}`,
|
||||
);
|
||||
zipfile.readEntry();
|
||||
} else {
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
ws.on('finish', () => zipfile.readEntry());
|
||||
rs.pipe(ws);
|
||||
});
|
||||
});
|
||||
|
||||
zipfile.on('end', resolve);
|
||||
zipfile.on('error', reject);
|
||||
zipfile.on('end', () => resolve());
|
||||
zipfile.on('error', (err) => reject(err));
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export function cleanUrlString(url: string): string {
|
||||
const [maybePath] = url.split('?', 1);
|
||||
return maybePath;
|
||||
if (!url) return null;
|
||||
const [mainUrl] = url.split('?', 1);
|
||||
return mainUrl;
|
||||
}
|
||||
|
||||
@ -1,6 +1,13 @@
|
||||
import { Window } from 'happy-dom';
|
||||
import { cleanUrlString } from './file.utils';
|
||||
import {
|
||||
Window,
|
||||
HTMLAnchorElement,
|
||||
HTMLIFrameElement,
|
||||
Element as HDElement,
|
||||
} from 'happy-dom';
|
||||
import { getEmbedUrlAndProvider } from '@docmost/editor-ext';
|
||||
import * as path from 'path';
|
||||
import { v7 } from 'uuid';
|
||||
import { InsertableBacklink } from '@docmost/db/types/entity.types';
|
||||
|
||||
export function formatImportHtml(html: string) {
|
||||
const pmHtml = notionFormatter(html);
|
||||
@ -14,23 +21,41 @@ export function defaultHtmlFormatter(html: string): string {
|
||||
|
||||
// embed providers
|
||||
const anchors = Array.from(doc.getElementsByTagName('a'));
|
||||
for (const a of anchors) {
|
||||
const href = cleanUrlString(a.getAttribute('href')) ?? '';
|
||||
if (!href) continue;
|
||||
for (const node of anchors) {
|
||||
const url = (node as HTMLAnchorElement).href;
|
||||
if (!url) continue;
|
||||
|
||||
const embedProvider = getEmbedUrlAndProvider(href);
|
||||
const embedProvider = getEmbedUrlAndProvider(url);
|
||||
// we only want to embed valid matches
|
||||
if (embedProvider.provider === 'iframe') continue;
|
||||
|
||||
if (embedProvider) {
|
||||
const embed = doc.createElement('div');
|
||||
embed.setAttribute('data-type', 'embed');
|
||||
embed.setAttribute('data-src', href);
|
||||
embed.setAttribute('data-provider', embedProvider.provider);
|
||||
embed.setAttribute('data-align', 'center');
|
||||
embed.setAttribute('data-width', '640');
|
||||
embed.setAttribute('data-height', '480');
|
||||
const embed = doc.createElement('div');
|
||||
embed.setAttribute('data-type', 'embed');
|
||||
embed.setAttribute('data-src', url);
|
||||
embed.setAttribute('data-provider', embedProvider.provider);
|
||||
embed.setAttribute('data-align', 'center');
|
||||
embed.setAttribute('data-width', '640');
|
||||
embed.setAttribute('data-height', '480');
|
||||
|
||||
a.replaceWith(embed);
|
||||
}
|
||||
node.replaceWith(embed);
|
||||
}
|
||||
|
||||
// embed providers
|
||||
const iframes = Array.from(doc.getElementsByTagName('iframe'));
|
||||
for (const iframe of iframes) {
|
||||
const url = (iframe as HTMLIFrameElement).src;
|
||||
if (!url) continue;
|
||||
|
||||
const embedProvider = getEmbedUrlAndProvider(url);
|
||||
const embed = doc.createElement('div');
|
||||
embed.setAttribute('data-type', 'embed');
|
||||
embed.setAttribute('data-src', url);
|
||||
embed.setAttribute('data-provider', embedProvider.provider);
|
||||
embed.setAttribute('data-align', 'center');
|
||||
embed.setAttribute('data-width', '640');
|
||||
embed.setAttribute('data-height', '480');
|
||||
|
||||
iframe.replaceWith(embed);
|
||||
}
|
||||
|
||||
return doc.body.innerHTML;
|
||||
@ -168,3 +193,78 @@ export function notionFormatter(html: string): string {
|
||||
}
|
||||
return doc.body.innerHTML;
|
||||
}
|
||||
|
||||
export function unwrapFromParagraph(node: HDElement) {
|
||||
let wrapper = node.closest('p, a') as HDElement | null;
|
||||
|
||||
while (wrapper) {
|
||||
if (wrapper.childNodes.length === 1) {
|
||||
// e.g. <p><node/></p> or <a><node/></a> → <node/>
|
||||
wrapper.replaceWith(node);
|
||||
} else {
|
||||
wrapper.parentNode!.insertBefore(node, wrapper);
|
||||
}
|
||||
wrapper = node.closest('p, a') as HDElement | null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function rewriteInternalLinksToMentionHtml(
|
||||
html: string,
|
||||
currentFilePath: string,
|
||||
filePathToPageMetaMap: Map<
|
||||
string,
|
||||
{ id: string; title: string; slugId: string }
|
||||
>,
|
||||
creatorId: string,
|
||||
sourcePageId: string,
|
||||
workspaceId: string,
|
||||
): Promise<{ html: string; backlinks: InsertableBacklink[] }> {
|
||||
const window = new Window();
|
||||
const doc = window.document;
|
||||
doc.body.innerHTML = html;
|
||||
|
||||
// normalize helper
|
||||
const normalize = (p: string) => p.replace(/\\/g, '/');
|
||||
|
||||
const backlinks: InsertableBacklink[] = [];
|
||||
|
||||
for (const a of Array.from(doc.getElementsByTagName('a'))) {
|
||||
const rawHref = a.getAttribute('href');
|
||||
if (!rawHref) continue;
|
||||
|
||||
// skip absolute/external URLs
|
||||
if (rawHref.startsWith('http') || rawHref.startsWith('/api/')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const decodedRef = decodeURIComponent(rawHref);
|
||||
const parentDir = path.dirname(currentFilePath);
|
||||
const joined = path.join(parentDir, decodedRef);
|
||||
const resolved = normalize(joined);
|
||||
|
||||
const pageMeta = filePathToPageMetaMap.get(resolved);
|
||||
if (!pageMeta) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const mentionEl = doc.createElement('span') as HDElement;
|
||||
mentionEl.setAttribute('data-type', 'mention');
|
||||
mentionEl.setAttribute('data-id', v7());
|
||||
mentionEl.setAttribute('data-entity-type', 'page');
|
||||
mentionEl.setAttribute('data-entity-id', pageMeta.id);
|
||||
mentionEl.setAttribute('data-label', pageMeta.title);
|
||||
mentionEl.setAttribute('data-slug-id', pageMeta.slugId);
|
||||
mentionEl.setAttribute('data-creator-id', creatorId);
|
||||
mentionEl.textContent = pageMeta.title;
|
||||
|
||||
a.replaceWith(mentionEl);
|
||||
|
||||
backlinks.push({
|
||||
sourcePageId,
|
||||
targetPageId: pageMeta.id,
|
||||
workspaceId: workspaceId,
|
||||
});
|
||||
}
|
||||
|
||||
return { html: doc.body.innerHTML, backlinks };
|
||||
}
|
||||
|
||||
66
apps/server/src/integrations/import/import.utils.ts
Normal file
66
apps/server/src/integrations/import/import.utils.ts
Normal file
@ -0,0 +1,66 @@
|
||||
import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
export async function buildAttachmentCandidates(
|
||||
extractDir: string,
|
||||
): Promise<Map<string, string>> {
|
||||
const map = new Map<string, string>();
|
||||
async function walk(dir: string) {
|
||||
for (const ent of await fs.readdir(dir, { withFileTypes: true })) {
|
||||
const abs = path.join(dir, ent.name);
|
||||
if (ent.isDirectory()) {
|
||||
await walk(abs);
|
||||
} else {
|
||||
if (['.md', '.html'].includes(path.extname(ent.name).toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rel = path.relative(extractDir, abs).split(path.sep).join('/');
|
||||
map.set(rel, abs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await walk(extractDir);
|
||||
return map;
|
||||
}
|
||||
|
||||
export function resolveRelativeAttachmentPath(
|
||||
raw: string,
|
||||
pageDir: string,
|
||||
attachmentCandidates: Map<string, string>,
|
||||
): string | null {
|
||||
const mainRel = decodeURIComponent(raw.replace(/^\.?\/+/, ''));
|
||||
const fallback = path.normalize(path.join(pageDir, mainRel));
|
||||
|
||||
if (attachmentCandidates.has(mainRel)) {
|
||||
return mainRel;
|
||||
}
|
||||
if (attachmentCandidates.has(fallback)) {
|
||||
return fallback;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function collectMarkdownAndHtmlFiles(
|
||||
dir: string,
|
||||
): Promise<string[]> {
|
||||
const results: string[] = [];
|
||||
|
||||
async function walk(current: string) {
|
||||
const entries = await fs.readdir(current, { withFileTypes: true });
|
||||
for (const ent of entries) {
|
||||
const fullPath = path.join(current, ent.name);
|
||||
if (ent.isDirectory()) {
|
||||
await walk(fullPath);
|
||||
} else if (
|
||||
['.md', '.html'].includes(path.extname(ent.name).toLowerCase())
|
||||
) {
|
||||
results.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await walk(dir);
|
||||
return results;
|
||||
}
|
||||
@ -88,7 +88,7 @@ async function bootstrap() {
|
||||
const logger = new Logger('NestApplication');
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error(`UnhandledRejection: ${promise}, reason: ${reason}`);
|
||||
logger.error(`UnhandledRejection, reason: ${reason}`, promise);
|
||||
});
|
||||
|
||||
process.on('uncaughtException', (error) => {
|
||||
|
||||
Reference in New Issue
Block a user