Compare commits

...

2 Commits

Author SHA1 Message Date
36a573fce9 import embeds 2025-05-26 15:37:35 -07:00
af54e1827d move embed parser to editor-ext package 2025-05-26 14:59:17 -07:00
5 changed files with 100 additions and 58 deletions

View File

@ -15,13 +15,10 @@ import {
import { IconEdit } from "@tabler/icons-react"; import { IconEdit } from "@tabler/icons-react";
import { z } from "zod"; import { z } from "zod";
import { useForm, zodResolver } from "@mantine/form"; import { useForm, zodResolver } from "@mantine/form";
import {
getEmbedProviderById,
getEmbedUrlAndProvider,
} from "@/features/editor/components/embed/providers.ts";
import { notifications } from "@mantine/notifications"; import { notifications } from "@mantine/notifications";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import i18n from "i18next"; import i18n from "i18next";
import { getEmbedProviderById, getEmbedUrlAndProvider } from '@docmost/editor-ext';
const schema = z.object({ const schema = z.object({
url: z url: z

View File

@ -28,8 +28,7 @@ import { markdownToHtml } from '@docmost/editor-ext';
import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils'; import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils';
import { AttachmentType } from '../../core/attachment/attachment.constants'; import { AttachmentType } from '../../core/attachment/attachment.constants';
import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils'; import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils';
import { not } from 'rxjs/internal/util/not'; import { formatImportHtml, notionFormatter } from './import-formatter';
import { notionFormatter } from './import-formatter';
@Injectable() @Injectable()
export class FileTaskService { export class FileTaskService {
@ -68,9 +67,8 @@ export class FileTaskService {
await pipeline(fileStream, createWriteStream(tmpZipPath)); await pipeline(fileStream, createWriteStream(tmpZipPath));
await extractZip(tmpZipPath, tmpExtractDir); await extractZip(tmpZipPath, tmpExtractDir);
console.log('extract here');
// TODO: internal link mentions, backlinks, attachments // TODO: backlinks
try { try {
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Processing); await this.updateTaskStatus(fileTaskId, FileTaskStatus.Processing);
// if type == generic // if type == generic
@ -127,8 +125,6 @@ export class FileTaskService {
content = await markdownToHtml(content); content = await markdownToHtml(content);
} }
//content = this.stripAllStyles(content)
content = await this.rewriteLocalFilesInHtml({ content = await this.rewriteLocalFilesInHtml({
html: content, html: content,
pageRelativePath: relPath, pageRelativePath: relPath,
@ -209,7 +205,7 @@ export class FileTaskService {
); );
const pmState = getProsemirrorContent( const pmState = getProsemirrorContent(
await this.importService.processHTML(notionFormatter(htmlContent)), await this.importService.processHTML(formatImportHtml(htmlContent)),
); );
const { title, prosemirrorJson } = const { title, prosemirrorJson } =

View File

@ -1,10 +1,53 @@
import { Window } from 'happy-dom'; import { Window } from 'happy-dom';
import { cleanUrlString } from './file.utils';
import { getEmbedUrlAndProvider } from '@docmost/editor-ext';
export function formatImportHtml(html: string) {
const pmHtml = notionFormatter(html);
return defaultHtmlFormatter(pmHtml);
}
export function defaultHtmlFormatter(html: string): string {
const window = new Window();
const doc = window.document;
doc.body.innerHTML = html;
// embed providers
const anchors = Array.from(doc.getElementsByTagName('a'));
for (const a of anchors) {
const href = cleanUrlString(a.getAttribute('href')) ?? '';
if (!href) continue;
const embedProvider = getEmbedUrlAndProvider(href);
if (embedProvider) {
const embed = doc.createElement('div');
embed.setAttribute('data-type', 'embed');
embed.setAttribute('data-src', href);
embed.setAttribute('data-provider', embedProvider.provider);
embed.setAttribute('data-align', 'center');
embed.setAttribute('data-width', '640');
embed.setAttribute('data-height', '480');
a.replaceWith(embed);
}
}
return doc.body.innerHTML;
}
export function notionFormatter(html: string): string { export function notionFormatter(html: string): string {
const window = new Window(); const window = new Window();
const doc = window.document; const doc = window.document;
doc.body.innerHTML = html; doc.body.innerHTML = html;
// remove empty description paragraph
doc.querySelectorAll('p.page-description').forEach((p) => {
if (p.textContent?.trim() === '') {
p.remove();
}
});
// Block math // Block math
for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) { for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) {
// get TeX source from the MathML <annotation> // get TeX source from the MathML <annotation>

View File

@ -16,4 +16,5 @@ export * from "./lib/drawio";
export * from "./lib/excalidraw"; export * from "./lib/excalidraw";
export * from "./lib/embed"; export * from "./lib/embed";
export * from "./lib/mention"; export * from "./lib/mention";
export * from "./lib/markdown"; export * from "./lib/markdown";
export * from "./lib/embed-provider";

View File

@ -7,102 +7,109 @@ export interface IEmbedProvider {
export const embedProviders: IEmbedProvider[] = [ export const embedProviders: IEmbedProvider[] = [
{ {
id: 'loom', id: "loom",
name: 'Loom', name: "Loom",
regex: /^https?:\/\/(?:www\.)?loom\.com\/(?:share|embed)\/([\da-zA-Z]+)\/?/, regex: /^https?:\/\/(?:www\.)?loom\.com\/(?:share|embed)\/([\da-zA-Z]+)\/?/,
getEmbedUrl: (match, url) => { getEmbedUrl: (match, url) => {
if(url.includes("/embed/")){ if (url.includes("/embed/")) {
return url; return url;
} }
return `https://loom.com/embed/${match[1]}`; return `https://loom.com/embed/${match[1]}`;
} },
}, },
{ {
id: 'airtable', id: "airtable",
name: 'Airtable', name: "Airtable",
regex: /^https:\/\/(www.)?airtable.com\/([a-zA-Z0-9]{2,})\/.*/, regex: /^https:\/\/(www.)?airtable.com\/([a-zA-Z0-9]{2,})\/.*/,
getEmbedUrl: (match, url: string) => { getEmbedUrl: (match, url: string) => {
const path = url.split('airtable.com/'); const path = url.split("airtable.com/");
if(url.includes("/embed/")){ if (url.includes("/embed/")) {
return url; return url;
} }
return `https://airtable.com/embed/${path[1]}`; return `https://airtable.com/embed/${path[1]}`;
} },
}, },
{ {
id: 'figma', id: "figma",
name: 'Figma', name: "Figma",
regex: /^https:\/\/[\w\.-]+\.?figma.com\/(file|proto|board|design|slides|deck)\/([0-9a-zA-Z]{22,128})/, regex:
/^https:\/\/[\w\.-]+\.?figma.com\/(file|proto|board|design|slides|deck)\/([0-9a-zA-Z]{22,128})/,
getEmbedUrl: (match, url: string) => { getEmbedUrl: (match, url: string) => {
return `https://www.figma.com/embed?url=${url}&embed_host=docmost`; return `https://www.figma.com/embed?url=${url}&embed_host=docmost`;
} },
}, },
{ {
'id': 'typeform', id: "typeform",
name: 'Typeform', name: "Typeform",
regex: /^(https?:)?(\/\/)?[\w\.]+\.typeform\.com\/to\/.+/, regex: /^(https?:)?(\/\/)?[\w\.]+\.typeform\.com\/to\/.+/,
getEmbedUrl: (match, url: string) => { getEmbedUrl: (match, url: string) => {
return url; return url;
} },
}, },
{ {
id: 'miro', id: "miro",
name: 'Miro', name: "Miro",
regex: /^https:\/\/(www\.)?miro\.com\/app\/board\/([\w-]+=)/, regex: /^https:\/\/(www\.)?miro\.com\/app\/board\/([\w-]+=)/,
getEmbedUrl: (match, url) => { getEmbedUrl: (match, url) => {
if(url.includes("/live-embed/")){ if (url.includes("/live-embed/")) {
return url; return url;
} }
return `https://miro.com/app/live-embed/${match[2]}?embedMode=view_only_without_ui&autoplay=true&embedSource=docmost`; return `https://miro.com/app/live-embed/${match[2]}?embedMode=view_only_without_ui&autoplay=true&embedSource=docmost`;
} },
}, },
{ {
id: 'youtube', id: "youtube",
name: 'YouTube', name: "YouTube",
regex: /^((?:https?:)?\/\/)?((?:www|m|music)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|v\/)?)([\w\-]+)(\S+)?$/, regex:
/^((?:https?:)?\/\/)?((?:www|m|music)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|v\/)?)([\w\-]+)(\S+)?$/,
getEmbedUrl: (match, url) => { getEmbedUrl: (match, url) => {
if (url.includes("/embed/")){ if (url.includes("/embed/")) {
return url; return url;
} }
return `https://www.youtube-nocookie.com/embed/${match[5]}`; return `https://www.youtube-nocookie.com/embed/${match[5]}`;
} },
}, },
{ {
id: 'vimeo', id: "vimeo",
name: 'Vimeo', name: "Vimeo",
regex: /^(https:)?\/\/(?:www\.|player\.)?vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/([^/]*)\/videos\/|album\/(\d+)\/video\/|video\/|)(\d+)/, regex:
/^(https:)?\/\/(?:www\.|player\.)?vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/([^/]*)\/videos\/|album\/(\d+)\/video\/|video\/|)(\d+)/,
getEmbedUrl: (match) => { getEmbedUrl: (match) => {
return `https://player.vimeo.com/video/${match[4]}`; return `https://player.vimeo.com/video/${match[4]}`;
} },
}, },
{ {
id: 'framer', id: "framer",
name: 'Framer', name: "Framer",
regex: /^https:\/\/(www\.)?framer\.com\/embed\/([\w-]+)/, regex: /^https:\/\/(www\.)?framer\.com\/embed\/([\w-]+)/,
getEmbedUrl: (match, url: string) => { getEmbedUrl: (match, url: string) => {
return url; return url;
} },
}, },
{ {
id: 'gdrive', id: "gdrive",
name: 'Google Drive', name: "Google Drive",
regex: /^((?:https?:)?\/\/)?((?:www|m)\.)?(drive\.google\.com)\/file\/d\/([a-zA-Z0-9_-]+)\/.*$/, regex:
/^((?:https?:)?\/\/)?((?:www|m)\.)?(drive\.google\.com)\/file\/d\/([a-zA-Z0-9_-]+)\/.*$/,
getEmbedUrl: (match) => { getEmbedUrl: (match) => {
return `https://drive.google.com/file/d/${match[4]}/preview`; return `https://drive.google.com/file/d/${match[4]}/preview`;
} },
}, },
{ {
id: 'gsheets', id: "gsheets",
name: 'Google Sheets', name: "Google Sheets",
regex: /^((?:https?:)?\/\/)?((?:www|m)\.)?(docs\.google\.com)\/spreadsheets\/d\/e\/([a-zA-Z0-9_-]+)\/.*$/, regex:
/^((?:https?:)?\/\/)?((?:www|m)\.)?(docs\.google\.com)\/spreadsheets\/d\/e\/([a-zA-Z0-9_-]+)\/.*$/,
getEmbedUrl: (match, url: string) => { getEmbedUrl: (match, url: string) => {
return url return url;
} },
}, },
]; ];
export function getEmbedProviderById(id: string) { export function getEmbedProviderById(id: string) {
return embedProviders.find(provider => provider.id.toLowerCase() === id.toLowerCase()); return embedProviders.find(
(provider) => provider.id.toLowerCase() === id.toLowerCase(),
);
} }
export interface IEmbedResult { export interface IEmbedResult {
@ -116,14 +123,12 @@ export function getEmbedUrlAndProvider(url: string): IEmbedResult {
if (match) { if (match) {
return { return {
embedUrl: provider.getEmbedUrl(match, url), embedUrl: provider.getEmbedUrl(match, url),
provider: provider.name.toLowerCase() provider: provider.name.toLowerCase(),
}; };
} }
} }
return { return {
embedUrl: url, embedUrl: url,
provider: 'iframe', provider: "iframe",
}; };
} }