Compare commits

...

2 Commits

Author SHA1 Message Date
36a573fce9 import embeds 2025-05-26 15:37:35 -07:00
af54e1827d move embed parser to editor-ext package 2025-05-26 14:59:17 -07:00
5 changed files with 100 additions and 58 deletions

View File

@ -15,13 +15,10 @@ import {
import { IconEdit } from "@tabler/icons-react";
import { z } from "zod";
import { useForm, zodResolver } from "@mantine/form";
import {
getEmbedProviderById,
getEmbedUrlAndProvider,
} from "@/features/editor/components/embed/providers.ts";
import { notifications } from "@mantine/notifications";
import { useTranslation } from "react-i18next";
import i18n from "i18next";
import { getEmbedProviderById, getEmbedUrlAndProvider } from '@docmost/editor-ext';
const schema = z.object({
url: z

View File

@ -28,8 +28,7 @@ import { markdownToHtml } from '@docmost/editor-ext';
import { getAttachmentFolderPath } from '../../core/attachment/attachment.utils';
import { AttachmentType } from '../../core/attachment/attachment.constants';
import { getProsemirrorContent } from '../../common/helpers/prosemirror/utils';
import { not } from 'rxjs/internal/util/not';
import { notionFormatter } from './import-formatter';
import { formatImportHtml, notionFormatter } from './import-formatter';
@Injectable()
export class FileTaskService {
@ -68,9 +67,8 @@ export class FileTaskService {
await pipeline(fileStream, createWriteStream(tmpZipPath));
await extractZip(tmpZipPath, tmpExtractDir);
console.log('extract here');
// TODO: internal link mentions, backlinks, attachments
// TODO: backlinks
try {
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Processing);
// if type == generic
@ -127,8 +125,6 @@ export class FileTaskService {
content = await markdownToHtml(content);
}
//content = this.stripAllStyles(content)
content = await this.rewriteLocalFilesInHtml({
html: content,
pageRelativePath: relPath,
@ -209,7 +205,7 @@ export class FileTaskService {
);
const pmState = getProsemirrorContent(
await this.importService.processHTML(notionFormatter(htmlContent)),
await this.importService.processHTML(formatImportHtml(htmlContent)),
);
const { title, prosemirrorJson } =

View File

@ -1,10 +1,53 @@
import { Window } from 'happy-dom';
import { cleanUrlString } from './file.utils';
import { getEmbedUrlAndProvider } from '@docmost/editor-ext';
export function formatImportHtml(html: string) {
const pmHtml = notionFormatter(html);
return defaultHtmlFormatter(pmHtml);
}
export function defaultHtmlFormatter(html: string): string {
const window = new Window();
const doc = window.document;
doc.body.innerHTML = html;
// embed providers
const anchors = Array.from(doc.getElementsByTagName('a'));
for (const a of anchors) {
const href = cleanUrlString(a.getAttribute('href')) ?? '';
if (!href) continue;
const embedProvider = getEmbedUrlAndProvider(href);
if (embedProvider) {
const embed = doc.createElement('div');
embed.setAttribute('data-type', 'embed');
embed.setAttribute('data-src', href);
embed.setAttribute('data-provider', embedProvider.provider);
embed.setAttribute('data-align', 'center');
embed.setAttribute('data-width', '640');
embed.setAttribute('data-height', '480');
a.replaceWith(embed);
}
}
return doc.body.innerHTML;
}
export function notionFormatter(html: string): string {
const window = new Window();
const doc = window.document;
doc.body.innerHTML = html;
// remove empty description paragraph
doc.querySelectorAll('p.page-description').forEach((p) => {
if (p.textContent?.trim() === '') {
p.remove();
}
});
// Block math
for (const fig of Array.from(doc.querySelectorAll('figure.equation'))) {
// get TeX source from the MathML <annotation>

View File

@ -16,4 +16,5 @@ export * from "./lib/drawio";
export * from "./lib/excalidraw";
export * from "./lib/embed";
export * from "./lib/mention";
export * from "./lib/markdown";
export * from "./lib/markdown";
export * from "./lib/embed-provider";

View File

@ -7,102 +7,109 @@ export interface IEmbedProvider {
export const embedProviders: IEmbedProvider[] = [
{
id: 'loom',
name: 'Loom',
id: "loom",
name: "Loom",
regex: /^https?:\/\/(?:www\.)?loom\.com\/(?:share|embed)\/([\da-zA-Z]+)\/?/,
getEmbedUrl: (match, url) => {
if(url.includes("/embed/")){
if (url.includes("/embed/")) {
return url;
}
return `https://loom.com/embed/${match[1]}`;
}
},
},
{
id: 'airtable',
name: 'Airtable',
id: "airtable",
name: "Airtable",
regex: /^https:\/\/(www.)?airtable.com\/([a-zA-Z0-9]{2,})\/.*/,
getEmbedUrl: (match, url: string) => {
const path = url.split('airtable.com/');
if(url.includes("/embed/")){
const path = url.split("airtable.com/");
if (url.includes("/embed/")) {
return url;
}
return `https://airtable.com/embed/${path[1]}`;
}
},
},
{
id: 'figma',
name: 'Figma',
regex: /^https:\/\/[\w\.-]+\.?figma.com\/(file|proto|board|design|slides|deck)\/([0-9a-zA-Z]{22,128})/,
id: "figma",
name: "Figma",
regex:
/^https:\/\/[\w\.-]+\.?figma.com\/(file|proto|board|design|slides|deck)\/([0-9a-zA-Z]{22,128})/,
getEmbedUrl: (match, url: string) => {
return `https://www.figma.com/embed?url=${url}&embed_host=docmost`;
}
},
},
{
'id': 'typeform',
name: 'Typeform',
id: "typeform",
name: "Typeform",
regex: /^(https?:)?(\/\/)?[\w\.]+\.typeform\.com\/to\/.+/,
getEmbedUrl: (match, url: string) => {
return url;
}
},
},
{
id: 'miro',
name: 'Miro',
id: "miro",
name: "Miro",
regex: /^https:\/\/(www\.)?miro\.com\/app\/board\/([\w-]+=)/,
getEmbedUrl: (match, url) => {
if(url.includes("/live-embed/")){
if (url.includes("/live-embed/")) {
return url;
}
return `https://miro.com/app/live-embed/${match[2]}?embedMode=view_only_without_ui&autoplay=true&embedSource=docmost`;
}
},
},
{
id: 'youtube',
name: 'YouTube',
regex: /^((?:https?:)?\/\/)?((?:www|m|music)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|v\/)?)([\w\-]+)(\S+)?$/,
id: "youtube",
name: "YouTube",
regex:
/^((?:https?:)?\/\/)?((?:www|m|music)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w\-]+\?v=|embed\/|v\/)?)([\w\-]+)(\S+)?$/,
getEmbedUrl: (match, url) => {
if (url.includes("/embed/")){
if (url.includes("/embed/")) {
return url;
}
return `https://www.youtube-nocookie.com/embed/${match[5]}`;
}
},
},
{
id: 'vimeo',
name: 'Vimeo',
regex: /^(https:)?\/\/(?:www\.|player\.)?vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/([^/]*)\/videos\/|album\/(\d+)\/video\/|video\/|)(\d+)/,
id: "vimeo",
name: "Vimeo",
regex:
/^(https:)?\/\/(?:www\.|player\.)?vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/([^/]*)\/videos\/|album\/(\d+)\/video\/|video\/|)(\d+)/,
getEmbedUrl: (match) => {
return `https://player.vimeo.com/video/${match[4]}`;
}
},
},
{
id: 'framer',
name: 'Framer',
id: "framer",
name: "Framer",
regex: /^https:\/\/(www\.)?framer\.com\/embed\/([\w-]+)/,
getEmbedUrl: (match, url: string) => {
return url;
}
},
},
{
id: 'gdrive',
name: 'Google Drive',
regex: /^((?:https?:)?\/\/)?((?:www|m)\.)?(drive\.google\.com)\/file\/d\/([a-zA-Z0-9_-]+)\/.*$/,
id: "gdrive",
name: "Google Drive",
regex:
/^((?:https?:)?\/\/)?((?:www|m)\.)?(drive\.google\.com)\/file\/d\/([a-zA-Z0-9_-]+)\/.*$/,
getEmbedUrl: (match) => {
return `https://drive.google.com/file/d/${match[4]}/preview`;
}
},
},
{
id: 'gsheets',
name: 'Google Sheets',
regex: /^((?:https?:)?\/\/)?((?:www|m)\.)?(docs\.google\.com)\/spreadsheets\/d\/e\/([a-zA-Z0-9_-]+)\/.*$/,
id: "gsheets",
name: "Google Sheets",
regex:
/^((?:https?:)?\/\/)?((?:www|m)\.)?(docs\.google\.com)\/spreadsheets\/d\/e\/([a-zA-Z0-9_-]+)\/.*$/,
getEmbedUrl: (match, url: string) => {
return url
}
return url;
},
},
];
export function getEmbedProviderById(id: string) {
return embedProviders.find(provider => provider.id.toLowerCase() === id.toLowerCase());
return embedProviders.find(
(provider) => provider.id.toLowerCase() === id.toLowerCase(),
);
}
export interface IEmbedResult {
@ -116,14 +123,12 @@ export function getEmbedUrlAndProvider(url: string): IEmbedResult {
if (match) {
return {
embedUrl: provider.getEmbedUrl(match, url),
provider: provider.name.toLowerCase()
provider: provider.name.toLowerCase(),
};
}
}
return {
embedUrl: url,
provider: 'iframe',
provider: "iframe",
};
}