diff --git a/packages/editor-ext/src/lib/prosemirror-docx/README.md b/packages/editor-ext/src/lib/prosemirror-docx/README.md new file mode 100644 index 000000000..37f10e399 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/README.md @@ -0,0 +1,167 @@ +# `prosemirror-docx` + +[![prosemirror-docx on npm](https://img.shields.io/npm/v/prosemirror-docx.svg)](https://www.npmjs.com/package/prosemirror-docx) +[![prosemirror-docx on GitHub](https://img.shields.io/github/stars/curvenote/prosemirror-docx.svg?style=social)](https://github.com/curvenote/prosemirror-docx) +[ +[![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/curvenote/prosemirror-docx/blob/master/LICENSE) +![CI](https://github.com/curvenote/prosemirror-docx/workflows/CI/badge.svg) + +Export a [prosemirror](https://prosemirror.net/) document to a Microsoft Word file, using [docx](https://docx.js.org/). + +![image](https://user-images.githubusercontent.com/913249/134953610-886047eb-2a21-4929-9a53-9a29d8f6184f.png) + +## Overview + +`prosemirror-docx` has a similar structure to [prosemirror-markdown](https://github.com/prosemirror/prosemirror-markdown), with a `DocxSerializerState` object that you write to as you walk the document. It is a light wrapper around , which actually does the export. Currently `prosemirror-docx` is write only (i.e. can export to, but can’t read from `*.docx`), and has most of the basic nodes covered (see below). + +[Curvenote](https://curvenote.com) uses this to export from [@curvenote/editor](https://github.com/curvenote/editor) to word docs, but this library currently only has dependence on `docx`, `prosemirror-model` and `image-dimensions` - and similar to `prosemirror-markdown`, the serialization schema can be edited externally (see `Extended usage` below). + +## Basic usage + +```ts +import { defaultDocxSerializer, writeDocx } from 'prosemirror-docx'; +import { EditorState } from 'prosemirror-state'; +import { writeFileSync } from 'fs'; // Or some other way to write a file + +// Set up your prosemirror state/document as you normally do +const state = EditorState.create({ schema: mySchema }); + +// If there are images, we will need to preload the buffers +const opts = { + getImageBuffer(src: string) { + return anImageBuffer; + }, +}; + +// Create a doc in memory, and then write it to disk +const wordDocument = defaultDocxSerializer.serialize(state.doc, opts); + +await writeDocx(wordDocument).then((buffer) => { + writeFileSync('HelloWorld.docx', buffer); +}); +``` + +### Advanced usage + +If you need to access the underlying state and modify the final docx `Document` you can use the last argument of `serialize` to pass in a callback function that receives the `DocxSerializerState`. + +This function needs to return an `IPropertiesOptions` type, ie. the config that should be passed to a `Document`. Your options will be spread with the default options, so you can override any of the defaults. + +```ts +const wordDocument = defaultDocxSerializer.serialize(state.doc, opts, (state) => { + return { + numbering: { + config: state.numbering, + }, + fonts: [], // embed fonts, + styles: { + paragraphStyles, + default: { + heading1: paragraphStyles[1], + }, + }, + }; +}); +``` + +See the [docx documentation](https://docx.js.org/#/usage/document) for more details on the options you can pass in. + +## Extended usage + +Instead of using the `defaultDocxSerializer` you can override or provide custom serializers. + +```ts +import { DocxSerializer, defaultNodes, defaultMarks } from 'prosemirror-docx'; + +const nodeSerializer = { + ...defaultNodes, + my_paragraph(state, node) { + state.renderInline(node); + state.closeBlock(node); + }, +}; + +export const myDocxSerializer = new DocxSerializer(nodeSerializer, defaultMarks); +``` + +The `state` is the `DocxSerializerState` and has helper methods to interact with `docx`. + +If the exported content includes image links that require fetching the image data, you can use asynchronous APIs. Here's a demo example: + +```ts +import { DocxSerializerAsync, defaultAsyncNodes, defaultMarks } from 'prosemirror-docx'; +import { EditorState } from 'prosemirror-state'; +import { writeFileSync } from 'fs'; + +const state = EditorState.create({ schema: mySchema }); + +export const docxSerializer = new DocxSerializerAsync( + { + ...defaultAsyncNodes, + async image(state, node) { + const { src } = node.attrs; + await state.image(src, 70, 'center', undefined, 'png'); + state.closeBlock(node); + }, + }, + defaultMarks, +); + +// If there are images, we will need to preload the buffers +const opts = { + async getImageBuffer(src: string) { + const arrayBuffer = await fetch(src).then((res) => res.arrayBuffer()); + return new Uint8Array(arrayBuffer); + }, +}; + +// Create a doc in memory, and then write it to disk +const wordDocument = docxSerializer.serializeAsync(state.doc, opts); + +await writeDocx(wordDocument).then((buffer) => { + writeFileSync('HelloWorld.docx', buffer); +}); +``` + +## Supported Nodes + +- text +- paragraph +- heading (levels) + - TODO: Support numbering of headings +- blockquote +- code_block + - TODO: No styles supported +- horizontal_rule +- hard_break +- ordered_list +- unordered_list +- list_item +- image +- math +- equations (numbered & unnumbered) +- tables + +Planned: + +- Internal References (e.g. see Table 1) + +## Supported Marks + +- em +- strong +- link + - Note: this is actually treated as a node in docx, so ignored as a prosemirror mark, but supported. +- code +- subscript +- superscript +- strikethrough +- underline +- smallcaps +- allcaps + +## Resources + +- [Prosemirror Docs](https://prosemirror.net/docs/) +- [docx](https://docx.js.org/) +- [prosemirror-markdown](https://github.com/ProseMirror/prosemirror-markdown) - similar implementation for markdown! diff --git a/packages/editor-ext/src/lib/prosemirror-docx/index.ts b/packages/editor-ext/src/lib/prosemirror-docx/index.ts new file mode 100644 index 000000000..44269c9c6 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/index.ts @@ -0,0 +1,25 @@ +// MIT - https://github.com/curvenote/prosemirror-docx/ +export type { SectionConfig, SerializationState } from './types'; +export type { + MarkSerializer, + NodeSerializer, + NodeSerializerAsync, + Options, + OptionsAsync, +} from './serializer'; + +export { + DocxSerializerStateAsync, + DocxSerializerAsync, + DocxSerializerState, + DocxSerializer, + MAX_IMAGE_WIDTH, +} from './serializer'; +export { + defaultDocxSerializer, + defaultDocxSerializerAsync, + defaultAsyncNodes, + defaultNodes, + defaultMarks, +} from './schema'; +export { writeDocx, createDocFromState, buildDoc } from './utils'; diff --git a/packages/editor-ext/src/lib/prosemirror-docx/numbering.ts b/packages/editor-ext/src/lib/prosemirror-docx/numbering.ts new file mode 100644 index 000000000..ce6e592e5 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/numbering.ts @@ -0,0 +1,47 @@ +import { AlignmentType, convertInchesToTwip, ILevelsOptions, LevelFormat } from 'docx'; +import { INumbering } from './types'; + +function basicIndentStyle(indent: number): Pick { + return { + alignment: AlignmentType.START, + style: { + paragraph: { + indent: { left: convertInchesToTwip(indent), hanging: convertInchesToTwip(0.18) }, + }, + }, + }; +} + +const numbered = Array(3) + .fill([LevelFormat.DECIMAL, LevelFormat.LOWER_LETTER, LevelFormat.LOWER_ROMAN]) + .flat() + .map((format, level) => ({ + level, + format, + text: `%${level + 1}.`, + ...basicIndentStyle((level + 1) / 2), + })); + +const bullets = Array(3) + .fill(['●', '○', '■']) + .flat() + .map((text, level) => ({ + level, + format: LevelFormat.BULLET, + text, + ...basicIndentStyle((level + 1) / 2), + })); + +const styles = { + numbered, + bullets, +}; + +export type NumberingStyles = keyof typeof styles; + +export function createNumbering(reference: string, style: NumberingStyles): INumbering { + return { + reference, + levels: styles[style], + }; +} diff --git a/packages/editor-ext/src/lib/prosemirror-docx/schema.ts b/packages/editor-ext/src/lib/prosemirror-docx/schema.ts new file mode 100644 index 000000000..dedabf886 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/schema.ts @@ -0,0 +1,200 @@ +import { HeadingLevel, ShadingType } from 'docx'; +import { + DocxSerializer, + MarkSerializer, + NodeSerializer, + DocxSerializerAsync, + NodeSerializerAsync, +} from './serializer'; +import { getLatexFromNode } from './utils'; + +export const defaultNodes: NodeSerializer = { + text(state, node) { + state.text(node.text ?? ''); + }, + paragraph(state, node) { + state.renderInline(node); + state.closeBlock(node); + }, + heading(state, node) { + state.renderInline(node); + const heading = [ + HeadingLevel.HEADING_1, + HeadingLevel.HEADING_2, + HeadingLevel.HEADING_3, + HeadingLevel.HEADING_4, + HeadingLevel.HEADING_5, + HeadingLevel.HEADING_6, + ][node.attrs.level - 1]; + state.closeBlock(node, { heading }); + }, + blockquote(state, node) { + state.renderContent(node, { style: 'IntenseQuote' }); + }, + code_block(state, node) { + // TODO: something for code + state.renderContent(node); + state.closeBlock(node); + }, + horizontal_rule(state, node) { + // Kinda hacky, but this works to insert two paragraphs, the first with a break + state.closeBlock(node, { thematicBreak: true }); + state.closeBlock(node); + }, + hard_break(state) { + state.addRunOptions({ break: 1 }); + }, + ordered_list(state, node) { + state.renderList(node, 'numbered'); + }, + bullet_list(state, node) { + state.renderList(node, 'bullets'); + }, + list_item(state, node) { + state.renderListItem(node); + }, + // Presentational + image(state, node) { + const { src } = node.attrs; + state.image(src); + state.closeBlock(node); + }, + // Technical + math(state, node) { + state.math(getLatexFromNode(node), { inline: true }); + }, + equation(state, node) { + const { id, numbered } = node.attrs; + state.math(getLatexFromNode(node), { inline: false, numbered, id }); + state.closeBlock(node); + }, + table(state, node) { + state.table(node); + }, +}; + +export const defaultAsyncNodes: NodeSerializerAsync = { + text(state, node) { + state.text(node.text ?? ''); + }, + async paragraph(state, node) { + await state.renderInline(node); + state.closeBlock(node); + }, + async heading(state, node) { + await state.renderInline(node); + const heading = [ + HeadingLevel.HEADING_1, + HeadingLevel.HEADING_2, + HeadingLevel.HEADING_3, + HeadingLevel.HEADING_4, + HeadingLevel.HEADING_5, + HeadingLevel.HEADING_6, + ][node.attrs.level - 1]; + state.closeBlock(node, { heading }); + }, + blockquote(state, node) { + state.renderContent(node, { style: 'IntenseQuote' }); + }, + code_block(state, node) { + // TODO: something for code + state.renderContent(node); + state.closeBlock(node); + }, + horizontal_rule(state, node) { + // Kinda hacky, but this works to insert two paragraphs, the first with a break + state.closeBlock(node, { thematicBreak: true }); + state.closeBlock(node); + }, + hard_break(state) { + state.addRunOptions({ break: 1 }); + }, + async ordered_list(state, node) { + await state.renderList(node, 'numbered'); + }, + async bullet_list(state, node) { + await state.renderList(node, 'bullets'); + }, + async list_item(state, node) { + await state.renderListItem(node); + }, + // Presentational + async image(state, node) { + const { src } = node.attrs; + await state.image(src); + state.closeBlock(node); + }, + // Technical + math(state, node) { + state.math(getLatexFromNode(node), { inline: true }); + }, + equation(state, node) { + const { id, numbered } = node.attrs; + state.math(getLatexFromNode(node), { inline: false, numbered, id }); + state.closeBlock(node); + }, + async table(state, node) { + await state.table(node); + }, +}; + +export const defaultMarks: MarkSerializer = { + em() { + return { italics: true }; + }, + strong() { + return { bold: true }; + }, + italic() { + return { italics: true }; + }, + bold() { + return { bold: true }; + }, + link() { + // Note, this is handled specifically in the serializer + // Word treats links more like a Node rather than a mark + return {}; + }, + code() { + return { + font: { + name: 'Monospace', + }, + color: '000000', + shading: { + type: ShadingType.SOLID, + color: 'D2D3D2', + fill: 'D2D3D2', + }, + }; + }, + abbr() { + // TODO: abbreviation + return {}; + }, + subscript() { + return { subScript: true }; + }, + superscript() { + return { superScript: true }; + }, + strikethrough() { + // doubleStrike! + return { strike: true }; + }, + underline() { + return { + underline: {}, + }; + }, + smallcaps() { + return { smallCaps: true }; + }, + allcaps() { + return { allCaps: true }; + }, +}; + +export const defaultDocxSerializer = new DocxSerializer(defaultNodes, defaultMarks); +export const defaultDocxSerializerAsync = new DocxSerializerAsync(defaultAsyncNodes, defaultMarks); diff --git a/packages/editor-ext/src/lib/prosemirror-docx/serializer.ts b/packages/editor-ext/src/lib/prosemirror-docx/serializer.ts new file mode 100644 index 000000000..a6a4d508d --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/serializer.ts @@ -0,0 +1,925 @@ +import { Node, Mark } from 'prosemirror-model'; +import { + IParagraphOptions, + IRunOptions, + Paragraph, + TextRun, + ExternalHyperlink, + ParagraphChild, + MathRun, + Math, + TabStopType, + TabStopPosition, + SequentialIdentifier, + Bookmark, + ImageRun, + AlignmentType, + Table, + TableRow, + TableCell, + ITableCellOptions, + InternalHyperlink, + SimpleField, + FootnoteReferenceRun, + IImageOptions, + Document, + ITableOptions, + ITableRowOptions, + IPropertiesOptions, +} from 'docx'; +import { imageDimensionsFromData } from 'image-dimensions'; +import { createNumbering, NumberingStyles } from './numbering'; +import { buildDoc, createShortId } from './utils'; +import { IFootnotes, INumbering, Mutable, SectionConfig, SerializationState } from './types'; + +// This is duplicated from @curvenote/schema +export type AlignOptions = 'left' | 'center' | 'right'; + +export type NodeSerializer = Record< + string, + (state: DocxSerializerState, node: Node, parent: Node, index: number) => void +>; + +export type NodeSerializerAsync = Record< + string, + (state: DocxSerializerStateAsync, node: Node, parent: Node, index: number) => void | Promise +>; + +export type MarkSerializer = Record< + string, + (state: DocxSerializerState | DocxSerializerStateAsync, node: Node, mark: Mark) => IRunOptions +>; + +export type Options = { + getImageBuffer: (src: string) => Uint8Array; + sections?: SectionConfig[]; +}; + +export type OptionsAsync = { + getImageBuffer: (src: string) => Uint8Array | Promise; + sections?: SectionConfig[]; +}; + +export type IMathOpts = { + inline?: boolean; + id?: string | null; + numbered?: boolean; +}; +export type ImageType = 'jpg' | 'png' | 'gif' | 'bmp'; + +export const MAX_IMAGE_WIDTH = 600; + +function createReferenceBookmark( + id: string, + kind: 'Equation' | 'Figure' | 'Table', + before?: string, + after?: string, +) { + const textBefore = before ? [new TextRun(before)] : []; + const textAfter = after ? [new TextRun(after)] : []; + return new Bookmark({ + id, + children: [...textBefore, new SequentialIdentifier(kind), ...textAfter], + }); +} + +export class DocxSerializerState { + nodes: NodeSerializer; + + options: Options; + + marks: MarkSerializer; + + children: (Paragraph | Table)[]; + + sections: Array<{ + config: SectionConfig; + children: (Paragraph | Table)[]; + }>; + + currentSectionIndex = 0; + + numbering: INumbering[]; + + footnotes: IFootnotes = {}; + + nextRunOpts?: IRunOptions; + + current: ParagraphChild[] = []; + + currentLink?: { link: string; children: IRunOptions[] }; + + // Optionally add options + nextParentParagraphOpts?: IParagraphOptions; + + currentNumbering?: { reference: string; level: number }; + + constructor(nodes: NodeSerializer, marks: MarkSerializer, options: Options) { + this.nodes = nodes; + this.marks = marks; + this.options = options ?? {}; + this.children = []; + this.numbering = []; + + // Initialize sections + if (options.sections && options.sections.length > 0) { + this.sections = options.sections.map((config) => ({ + config, + children: [], + })); + this.children = this.sections[0].children; + } else { + this.sections = []; + } + } + + renderContent(parent: Node, opts?: IParagraphOptions) { + parent.forEach((node, _, i) => { + if (opts) this.addParagraphOptions(opts); + this.render(node, parent, i); + }); + } + + render(node: Node, parent: Node, index: number) { + if (typeof parent === 'number') throw new Error('!'); + if (!this.nodes[node.type.name]) + throw new Error(`Token type \`${node.type.name}\` not supported by Word renderer`); + this.nodes[node.type.name](this, node, parent, index); + } + + renderMarks(node: Node, marks: Mark[]): IRunOptions { + return marks + .map((mark) => { + return this.marks[mark.type.name]?.(this, node, mark); + }) + .reduce((a, b) => ({ ...a, ...b }), {}); + } + + renderInline(parent: Node) { + // Pop the stack over to this object when we encounter a link, and closeLink restores it + let currentLink: { link: string; stack: ParagraphChild[] } | undefined; + const closeLink = () => { + if (!currentLink) return; + const hyperlink = new ExternalHyperlink({ + link: currentLink.link, + // child: this.current[0], + children: this.current, + }); + this.current = [...currentLink.stack, hyperlink]; + currentLink = undefined; + }; + const openLink = (href: string) => { + const sameLink = href === currentLink?.link; + this.addRunOptions({ style: 'Hyperlink' }); + // TODO: https://github.com/dolanmiu/docx/issues/1119 + // Remove the if statement here and oneLink! + const oneLink = true; + if (!oneLink) { + closeLink(); + } else { + if (currentLink && sameLink) return; + if (currentLink && !sameLink) { + // Close previous, and open a new one + closeLink(); + } + } + currentLink = { + link: href, + stack: this.current, + }; + this.current = []; + }; + const progress = (node: Node, offset: number, index: number) => { + const links = node.marks.filter((m) => m.type.name === 'link'); + const hasLink = links.length > 0; + if (hasLink) { + openLink(links[0].attrs.href); + } else if (!hasLink && currentLink) { + closeLink(); + } + if (node.isText) { + this.text(node.text, this.renderMarks(node, [...node.marks])); + } else { + this.render(node, parent, index); + } + }; + parent.forEach(progress); + // Must call close at the end of everything, just in case + closeLink(); + } + + renderList(node: Node, style: NumberingStyles) { + if (!this.currentNumbering) { + const nextId = createShortId(); + this.numbering.push(createNumbering(nextId, style)); + this.currentNumbering = { reference: nextId, level: 0 }; + } else { + const { reference, level } = this.currentNumbering; + this.currentNumbering = { reference, level: level + 1 }; + } + this.renderContent(node); + if (this.currentNumbering.level === 0) { + delete this.currentNumbering; + } else { + const { reference, level } = this.currentNumbering; + this.currentNumbering = { reference, level: level - 1 }; + } + } + + // This is a pass through to the paragraphs, etc. underneath they will close the block + renderListItem(node: Node) { + if (!this.currentNumbering) throw new Error('Trying to create a list item without a list?'); + this.addParagraphOptions({ numbering: this.currentNumbering }); + this.renderContent(node); + } + + addParagraphOptions(opts: IParagraphOptions) { + this.nextParentParagraphOpts = { ...this.nextParentParagraphOpts, ...opts }; + } + + addRunOptions(opts: IRunOptions) { + this.nextRunOpts = { ...this.nextRunOpts, ...opts }; + } + + text(text: string | null | undefined, opts?: IRunOptions) { + if (!text) return; + this.current.push(new TextRun({ text, ...this.nextRunOpts, ...opts })); + delete this.nextRunOpts; + } + + math(latex: string, opts: IMathOpts = { inline: true }) { + if (opts.inline || !opts.numbered) { + this.current.push(new Math({ children: [new MathRun(latex)] })); + return; + } + const id = opts.id ?? createShortId(); + this.current = [ + new TextRun('\t'), + new Math({ + children: [new MathRun(latex)], + }), + new TextRun('\t('), + createReferenceBookmark(id, 'Equation'), + new TextRun(')'), + ]; + this.addParagraphOptions({ + tabStops: [ + { + type: TabStopType.CENTER, + position: TabStopPosition.MAX / 2, + }, + { + type: TabStopType.RIGHT, + position: TabStopPosition.MAX, + }, + ], + }); + } + + // not sure what this actually is, seems to be close for 8.5x11 + maxImageWidth = MAX_IMAGE_WIDTH; + + image( + src: string, + widthPercent = 70, + align: AlignOptions = 'center', + imageRunOpts?: IImageOptions, + imageType?: ImageType, + ) { + const buffer = this.options.getImageBuffer(src); + const dimensions = imageDimensionsFromData(buffer); + /* If the image is not a valid image, don't add it */ + if (!dimensions) return; + const aspect = dimensions.height / dimensions.width; + const width = this.maxImageWidth * (widthPercent / 100); + let it; + try { + it = imageType || (src.replace(/.*\./, '').toLowerCase() as any); + } catch (e) { + it = 'png'; + } + this.current.push( + new ImageRun({ + data: buffer, + ...imageRunOpts, + type: it, + transformation: { + ...(imageRunOpts?.transformation || {}), + width, + height: width * aspect, + }, + }), + ); + let alignment: string; + switch (align) { + case 'right': + alignment = AlignmentType.RIGHT; + break; + case 'left': + alignment = AlignmentType.LEFT; + break; + default: + alignment = AlignmentType.CENTER; + } + this.addParagraphOptions({ + alignment: alignment as any, + }); + } + + table( + node: Node, + opts: { + getCellOptions?: (cell: Node) => ITableCellOptions; + getRowOptions?: (row: Node) => Omit; + tableOptions?: Omit; + } = {}, + ) { + const { getCellOptions, getRowOptions, tableOptions } = opts; + const actualChildren = this.children; + const rows: TableRow[] = []; + node.content.forEach((row) => { + const cells: TableCell[] = []; + // Check if all cells are headers in this row + let tableHeader = true; + row.content.forEach((cell) => { + if (cell.type.name !== 'table_header') { + tableHeader = false; + } + }); + // This scales images inside of tables + this.maxImageWidth = MAX_IMAGE_WIDTH / row.content.childCount; + row.content.forEach((cell) => { + this.children = []; + this.renderContent(cell); + const tableCellOpts: Mutable = { children: this.children }; + const colspan = cell.attrs.colspan ?? 1; + const rowspan = cell.attrs.rowspan ?? 1; + if (colspan > 1) tableCellOpts.columnSpan = colspan; + if (rowspan > 1) tableCellOpts.rowSpan = rowspan; + cells.push( + new TableCell({ + ...tableCellOpts, + ...(getCellOptions?.(cell) || {}), + }), + ); + }); + rows.push(new TableRow({ ...(getRowOptions?.(row) || {}), children: cells, tableHeader })); + }); + this.maxImageWidth = MAX_IMAGE_WIDTH; + const table = new Table({ ...tableOptions, rows }); + actualChildren.push(table); + // If there are multiple tables, this seperates them + actualChildren.push(new Paragraph('')); + this.children = actualChildren; + } + + captionLabel(id: string, kind: 'Figure' | 'Table', { suffix } = { suffix: ': ' }) { + this.current.push(...[createReferenceBookmark(id, kind, `${kind} `), new TextRun(suffix)]); + } + + $footnoteCounter = 0; + + footnote(node: Node) { + const { current, nextRunOpts } = this; + // Delete everything and work with the footnote inline on the current + this.current = []; + delete this.nextRunOpts; + + this.$footnoteCounter += 1; + this.renderInline(node); + this.footnotes[this.$footnoteCounter] = { + children: [new Paragraph({ children: this.current })], + }; + this.current = current; + this.nextRunOpts = nextRunOpts; + this.current.push(new FootnoteReferenceRun(this.$footnoteCounter)); + } + + closeBlock(node: Node, props?: IParagraphOptions) { + const paragraph = new Paragraph({ + children: this.current, + ...this.nextParentParagraphOpts, + ...props, + }); + this.current = []; + delete this.nextParentParagraphOpts; + this.children.push(paragraph); + } + + /** + * Move to the next section. If no more sections are available, + * this will be ignored (content continues in current section). + */ + nextSection() { + if (this.currentSectionIndex < this.sections.length - 1) { + this.currentSectionIndex += 1; + this.children = this.sections[this.currentSectionIndex].children; + } + } + + /** + * Update the current section's configuration + */ + setSectionConfig(config: Partial) { + this.sections[this.currentSectionIndex].config = { + ...this.sections[this.currentSectionIndex].config, + ...config, + }; + } + + /** + * Add a new section with the given configuration and switch to it + */ + addSection(config: SectionConfig = {}) { + this.sections.push({ + config, + children: [], + }); + this.currentSectionIndex = this.sections.length - 1; + this.children = this.sections[this.currentSectionIndex].children; + } + + /** + * Get the current section index + */ + getCurrentSectionIndex(): number { + return this.currentSectionIndex; + } + + /** + * Get the current section configuration + */ + getCurrentSectionConfig(): SectionConfig { + return this.sections[this.currentSectionIndex].config; + } + + /** + * Get the current serialization state for document creation + */ + getSerializationState(): SerializationState { + return { + numbering: this.numbering, + sections: this.sections, + footnotes: this.footnotes, + }; + } + + createReference(id: string, before?: string, after?: string) { + const children: ParagraphChild[] = []; + if (before) children.push(new TextRun(before)); + children.push(new SimpleField(`REF ${id} \\h`)); + if (after) children.push(new TextRun(after)); + const ref = new InternalHyperlink({ anchor: id, children }); + this.current.push(ref); + } +} + +export class DocxSerializer { + nodes: NodeSerializer; + + marks: MarkSerializer; + + constructor(nodes: NodeSerializer, marks: MarkSerializer) { + this.nodes = nodes; + this.marks = marks; + } + + serialize( + content: Node, + options: Options, + getDocumentOptions?: (state: SerializationState) => IPropertiesOptions, + ): Document { + const state = new DocxSerializerState(this.nodes, this.marks, options); + state.renderContent(content); + return buildDoc(state, getDocumentOptions?.(state)); + } +} + +export class DocxSerializerStateAsync { + nodes: NodeSerializerAsync; + + options: OptionsAsync; + + marks: MarkSerializer; + + children: (Paragraph | Table)[]; + + sections: Array<{ + config: SectionConfig; + children: (Paragraph | Table)[]; + }>; + + currentSectionIndex = 0; + + numbering: INumbering[]; + + footnotes: IFootnotes = {}; + + nextRunOpts?: IRunOptions; + + current: ParagraphChild[] = []; + + currentLink?: { link: string; children: IRunOptions[] }; + + // Optionally add options + nextParentParagraphOpts?: IParagraphOptions; + + currentNumbering?: { reference: string; level: number }; + + constructor(nodes: NodeSerializerAsync, marks: MarkSerializer, options: OptionsAsync) { + this.nodes = nodes; + this.marks = marks; + this.options = options ?? {}; + this.children = []; + this.numbering = []; + + // Initialize sections + if (options.sections && options.sections.length > 0) { + this.sections = options.sections.map((config) => ({ + config, + children: [], + })); + this.children = this.sections[0].children; + } else { + this.sections = []; + } + } + + async renderContent(parent: Node, opts?: IParagraphOptions) { + for (let i = 0; i < parent.childCount; i += 1) { + const node = parent.child(i); + if (opts) this.addParagraphOptions(opts); + // eslint-disable-next-line no-await-in-loop + await this.render(node, parent, i); + } + } + + async render(node: Node, parent: Node, index: number) { + if (typeof parent === 'number') throw new Error('!'); + if (!this.nodes[node.type.name]) + throw new Error(`Token type \`${node.type.name}\` not supported by Word renderer`); + await Promise.resolve(this.nodes[node.type.name](this, node, parent, index)); + } + + renderMarks(node: Node, marks: Mark[]): IRunOptions { + return marks + .map((mark) => { + return this.marks[mark.type.name]?.(this, node, mark); + }) + .reduce((a, b) => ({ ...a, ...b }), {}); + } + + async renderInline(parent: Node) { + // Pop the stack over to this object when we encounter a link, and closeLink restores it + let currentLink: { link: string; stack: ParagraphChild[] } | undefined; + const closeLink = () => { + if (!currentLink) return; + const hyperlink = new ExternalHyperlink({ + link: currentLink.link, + // child: this.current[0], + children: this.current, + }); + this.current = [...currentLink.stack, hyperlink]; + currentLink = undefined; + }; + const openLink = (href: string) => { + const sameLink = href === currentLink?.link; + this.addRunOptions({ style: 'Hyperlink' }); + // TODO: https://github.com/dolanmiu/docx/issues/1119 + // Remove the if statement here and oneLink! + const oneLink = true; + if (!oneLink) { + closeLink(); + } else { + if (currentLink && sameLink) return; + if (currentLink && !sameLink) { + // Close previous, and open a new one + closeLink(); + } + } + currentLink = { + link: href, + stack: this.current, + }; + this.current = []; + }; + const progress = async (node: Node, offset: number, index: number) => { + const links = node.marks.filter((m) => m.type.name === 'link'); + const hasLink = links.length > 0; + if (hasLink) { + openLink(links[0].attrs.href); + } else if (!hasLink && currentLink) { + closeLink(); + } + if (node.isText) { + this.text(node.text, this.renderMarks(node, [...node.marks])); + } else { + await this.render(node, parent, index); + } + }; + // Process nodes sequentially to maintain order + for (let i = 0; i < parent.childCount; i += 1) { + // eslint-disable-next-line no-await-in-loop + await progress(parent.child(i), 0, i); + } + // Must call close at the end of everything, just in case + closeLink(); + } + + async renderList(node: Node, style: NumberingStyles) { + if (!this.currentNumbering) { + const nextId = createShortId(); + this.numbering.push(createNumbering(nextId, style)); + this.currentNumbering = { reference: nextId, level: 0 }; + } else { + const { reference, level } = this.currentNumbering; + this.currentNumbering = { reference, level: level + 1 }; + } + await this.renderContent(node); + if (this.currentNumbering.level === 0) { + delete this.currentNumbering; + } else { + const { reference, level } = this.currentNumbering; + this.currentNumbering = { reference, level: level - 1 }; + } + } + + // This is a pass through to the paragraphs, etc. underneath they will close the block + async renderListItem(node: Node) { + if (!this.currentNumbering) throw new Error('Trying to create a list item without a list?'); + this.addParagraphOptions({ numbering: this.currentNumbering }); + await this.renderContent(node); + } + + addParagraphOptions(opts: IParagraphOptions) { + this.nextParentParagraphOpts = { ...this.nextParentParagraphOpts, ...opts }; + } + + addRunOptions(opts: IRunOptions) { + this.nextRunOpts = { ...this.nextRunOpts, ...opts }; + } + + text(text: string | null | undefined, opts?: IRunOptions) { + if (!text) return; + this.current.push(new TextRun({ text, ...this.nextRunOpts, ...opts })); + delete this.nextRunOpts; + } + + math(latex: string, opts: IMathOpts = { inline: true }) { + if (opts.inline || !opts.numbered) { + this.current.push(new Math({ children: [new MathRun(latex)] })); + return; + } + const id = opts.id ?? createShortId(); + this.current = [ + new TextRun('\t'), + new Math({ + children: [new MathRun(latex)], + }), + new TextRun('\t('), + createReferenceBookmark(id, 'Equation'), + new TextRun(')'), + ]; + this.addParagraphOptions({ + tabStops: [ + { + type: TabStopType.CENTER, + position: TabStopPosition.MAX / 2, + }, + { + type: TabStopType.RIGHT, + position: TabStopPosition.MAX, + }, + ], + }); + } + + // not sure what this actually is, seems to be close for 8.5x11 + maxImageWidth = MAX_IMAGE_WIDTH; + + async image( + src: string, + widthPercent = 70, + align: AlignOptions = 'center', + imageRunOpts?: IImageOptions, + imageType?: ImageType, + ) { + const buffer = await Promise.resolve(this.options.getImageBuffer(src)); + const dimensions = imageDimensionsFromData(buffer); + /* If the image is not a valid image, don't add it */ + if (!dimensions) return; + const aspect = dimensions.height / dimensions.width; + const width = this.maxImageWidth * (widthPercent / 100); + let it; + try { + it = imageType || (src.replace(/.*\./, '').toLowerCase() as any); + } catch (e) { + it = 'png'; + } + this.current.push( + new ImageRun({ + data: buffer, + ...imageRunOpts, + type: it, + transformation: { + ...(imageRunOpts?.transformation || {}), + width, + height: width * aspect, + }, + }), + ); + let alignment: string; + switch (align) { + case 'right': + alignment = AlignmentType.RIGHT; + break; + case 'left': + alignment = AlignmentType.LEFT; + break; + default: + alignment = AlignmentType.CENTER; + } + this.addParagraphOptions({ + alignment: alignment as any, + }); + } + + async table( + node: Node, + opts: { + getCellOptions?: (cell: Node) => ITableCellOptions; + getRowOptions?: (row: Node) => Omit; + tableOptions?: Omit; + } = {}, + ) { + const { getCellOptions, getRowOptions, tableOptions } = opts; + const actualChildren = this.children; + const rows: TableRow[] = []; + + for (let rowIndex = 0; rowIndex < node.content.childCount; rowIndex += 1) { + const row = node.content.child(rowIndex); + const cells: TableCell[] = []; + // Check if all cells are headers in this row + let tableHeader = true; + + // Check if all cells in the row are headers + for (let cellIndex = 0; cellIndex < row.content.childCount; cellIndex += 1) { + const cell = row.content.child(cellIndex); + if (cell.type.name !== 'table_header') { + tableHeader = false; + } + } + // This scales images inside of tables + this.maxImageWidth = MAX_IMAGE_WIDTH / row.content.childCount; + + // Iterate through cells and ensure order + for (let cellIndex = 0; cellIndex < row.content.childCount; cellIndex += 1) { + const cell = row.content.child(cellIndex); + this.children = []; + // eslint-disable-next-line no-await-in-loop + await this.renderContent(cell); // Ensure order + const tableCellOpts: Mutable = { children: this.children }; + const colspan = cell.attrs.colspan ?? 1; + const rowspan = cell.attrs.rowspan ?? 1; + if (colspan > 1) tableCellOpts.columnSpan = colspan; + if (rowspan > 1) tableCellOpts.rowSpan = rowspan; + cells.push( + new TableCell({ + ...tableCellOpts, + ...(getCellOptions?.(cell) || {}), + }), + ); + } + + rows.push(new TableRow({ ...(getRowOptions?.(row) || {}), children: cells, tableHeader })); + } + + this.maxImageWidth = MAX_IMAGE_WIDTH; + const table = new Table({ ...tableOptions, rows }); + actualChildren.push(table); + // If there are multiple tables, this separates them + actualChildren.push(new Paragraph('')); + this.children = actualChildren; + } + + captionLabel(id: string, kind: 'Figure' | 'Table', { suffix } = { suffix: ': ' }) { + this.current.push(...[createReferenceBookmark(id, kind, `${kind} `), new TextRun(suffix)]); + } + + $footnoteCounter = 0; + + async footnote(node: Node) { + const { current, nextRunOpts } = this; + // Delete everything and work with the footnote inline on the current + this.current = []; + delete this.nextRunOpts; + + this.$footnoteCounter += 1; + await this.renderInline(node); + this.footnotes[this.$footnoteCounter] = { + children: [new Paragraph({ children: this.current })], + }; + this.current = current; + this.nextRunOpts = nextRunOpts; + this.current.push(new FootnoteReferenceRun(this.$footnoteCounter)); + } + + closeBlock(node: Node, props?: IParagraphOptions) { + const paragraph = new Paragraph({ + children: this.current, + ...this.nextParentParagraphOpts, + ...props, + }); + this.current = []; + delete this.nextParentParagraphOpts; + this.children.push(paragraph); + } + + /** + * Move to the next section. If no more sections are available, + * this will be ignored (content continues in current section). + */ + nextSection() { + if (this.currentSectionIndex < this.sections.length - 1) { + this.currentSectionIndex += 1; + this.children = this.sections[this.currentSectionIndex].children; + } + } + + /** + * Update the current section's configuration + */ + setSectionConfig(config: Partial) { + this.sections[this.currentSectionIndex].config = { + ...this.sections[this.currentSectionIndex].config, + ...config, + }; + } + + /** + * Add a new section with the given configuration and switch to it + */ + addSection(config: SectionConfig = {}) { + this.sections.push({ + config, + children: [], + }); + this.currentSectionIndex = this.sections.length - 1; + this.children = this.sections[this.currentSectionIndex].children; + } + + /** + * Get the current section index + */ + getCurrentSectionIndex(): number { + return this.currentSectionIndex; + } + + /** + * Get the current section configuration + */ + getCurrentSectionConfig(): SectionConfig { + return this.sections[this.currentSectionIndex].config; + } + + /** + * Get the current serialization state for document creation + */ + getSerializationState(): SerializationState { + return { + numbering: this.numbering, + sections: this.sections, + footnotes: this.footnotes, + }; + } + + createReference(id: string, before?: string, after?: string) { + const children: ParagraphChild[] = []; + if (before) children.push(new TextRun(before)); + children.push(new SimpleField(`REF ${id} \\h`)); + if (after) children.push(new TextRun(after)); + const ref = new InternalHyperlink({ anchor: id, children }); + this.current.push(ref); + } +} + +export class DocxSerializerAsync { + nodes: NodeSerializerAsync; + + marks: MarkSerializer; + + constructor(nodes: NodeSerializerAsync, marks: MarkSerializer) { + this.nodes = nodes; + this.marks = marks; + } + + async serializeAsync( + content: Node, + options: OptionsAsync, + getDocumentOptions?: (state: SerializationState) => IPropertiesOptions, + ) { + const state = new DocxSerializerStateAsync(this.nodes, this.marks, options); + await state.renderContent(content); + return buildDoc(state, getDocumentOptions?.(state)); + } +} diff --git a/packages/editor-ext/src/lib/prosemirror-docx/test/build.ts b/packages/editor-ext/src/lib/prosemirror-docx/test/build.ts new file mode 100644 index 000000000..58f36c562 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/test/build.ts @@ -0,0 +1,28 @@ +import { Schema } from 'prosemirror-model'; +import { builders } from 'prosemirror-test-builder'; +import { schemas } from '@curvenote/schema'; + +const schema = new Schema(schemas.presets.full); + +export const tnodes = builders(schema, { + p: { nodeType: 'paragraph' }, + h1: { nodeType: 'heading', level: 1 }, + h2: { nodeType: 'heading', level: 2 }, + hr: { nodeType: 'horizontal_rule' }, + li: { nodeType: 'list_item' }, + ol: { nodeType: 'ordered_list' }, + ol3: { nodeType: 'ordered_list', order: 3 }, + ul: { nodeType: 'bullet_list' }, + pre: { nodeType: 'code_block' }, + br: { nodeType: 'hard_break' }, + img: { nodeType: 'image', src: 'img.png', alt: 'x' }, + a: { markType: 'link', href: 'https://example.com' }, + math: { nodeType: 'math' }, + equation: { nodeType: 'equation', numbered: true, id: 'eq1' }, + equationUnnumbered: { nodeType: 'equation', numbered: false, id: 'eq2' }, + abbr: { nodeType: 'abbr', title: 'Cascading Style Sheets' }, + aside: { nodeType: 'aside' }, + figure: { nodeType: 'figure' }, +}) as any; + +export const tdoc = (...args: Parameters) => tnodes.doc('', ...args); diff --git a/packages/editor-ext/src/lib/prosemirror-docx/test/word.spec.ts b/packages/editor-ext/src/lib/prosemirror-docx/test/word.spec.ts new file mode 100644 index 000000000..10c8ee0e2 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/test/word.spec.ts @@ -0,0 +1,109 @@ +import * as fs from 'fs'; +import { describe, it, expect } from 'vitest'; +import { + DocxSerializerAsync, + defaultAsyncNodes, + defaultMarks, + defaultDocxSerializer, + writeDocx, +} from '../src'; +import { tnodes, tdoc } from './build'; +import { writeFileSync } from 'fs'; +const { + blockquote, + h1, + h2, + p, + hr, + li, + ol, + ol3, + ul, + pre, + em, + strong, + code, + a, + br, + img, + math, + equation, + equationUnnumbered, + figure, +} = tnodes; + +const imageBase64Data = `iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAMAAAD04JH5AAACzVBMVEUAAAAAAAAAAAAAAAA/AD8zMzMqKiokJCQfHx8cHBwZGRkuFxcqFSonJyckJCQiIiIfHx8eHh4cHBwoGhomGSYkJCQhISEfHx8eHh4nHR0lHBwkGyQjIyMiIiIgICAfHx8mHh4lHh4kHR0jHCMiGyIhISEgICAfHx8lHx8kHh4jHR0hHCEhISEgICAlHx8kHx8jHh4jHh4iHSIhHCEhISElICAkHx8jHx8jHh4iHh4iHSIhHSElICAkICAjHx8jHx8iHh4iHh4hHiEhHSEkICAjHx8iHx8iHx8hHh4hHiEkHSEjHSAjHx8iHx8iHx8hHh4kHiEkHiEjHSAiHx8hHx8hHh4kHiEjHiAjHSAiHx8iHx8hHx8kHh4jHiEjHiAjHiAiICAiHx8kHx8jHh4jHiEjHiAiHiAiHSAiHx8jHx8jHx8jHiAiHiAiHiAiHSAiHx8jHx8jHx8iHiAiHiAiHiAjHx8jHx8jHx8jHx8iHiAiHiAiHiAjHx8jHx8jHx8iHx8iHSAiHiAjHiAjHx8jHx8hHx8iHx8iHyAiHiAjHiAjHiAjHh4hHx8iHx8iHx8iHyAjHSAjHiAjHiAjHh4hHx8iHx8iHx8jHyAjHiAhHh4iHx8iHx8jHyAjHSAjHSAhHiAhHh4iHx8iHx8jHx8jHyAjHSAjHSAiHh4iHh4jHx8jHx8jHyAjHyAhHSAhHSAiHh4iHh4jHx8jHx8jHyAhHyAhHSAiHSAiHh4jHh4jHx8jHx8jHyAhHyAhHSAiHSAjHR4jHh4jHx8jHx8hHyAhHyAiHSAjHSAjHR4jHh4jHx8hHx8hHyAhHyAiHyAjHSAjHR4jHR4hHh4hHx8hHyAiHyAjHyAjHSAjHR4jHR4hHh4hHx8hHyAjHyAjHyAjHSAjHR4hHR4hHR4hHx8iHyAjHyAjHyAjHSAhHR4hHR4hHR4hHx8jHyAjHyAjHyAjHyC9S2xeAAAA7nRSTlMAAQIDBAUGBwgJCgsMDQ4PEBESExQVFxgZGhscHR4fICEiIyQlJicoKSorLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZISUpLTE1OUFFSU1RVVllaW1xdXmBhYmNkZWZnaGprbG1ub3Byc3R1dnd4eXp8fn+AgYKDhIWGiImKi4yNj5CRkpOUlZaXmJmam5ydnp+goaKjpKaoqqusra6vsLGys7S1tri5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+fkZpVQAABcBJREFUGBntwftjlQMcBvDnnLNL22qzJjWlKLHFVogyty3SiFq6EZliqZGyhnSxsLlMRahYoZKRFcul5dKFCatYqWZaNKvWtrPz/A2+7/b27qRzec/lPfvl/XxgMplMJpPJZDKZAtA9HJ3ppnIez0KnSdtC0RCNznHdJrbrh85wdSlVVRaEXuoGamYi5K5430HNiTiEWHKJg05eRWgNfKeV7RxbqUhGKPV/207VupQ8is0IoX5vtFC18SqEHaK4GyHTZ2kzVR8PBTCO4oANIZL4ShNVZcOhKKeYg9DoWdhI1ec3os2VFI0JCIUez5+i6st0qJZRrEAIJCw+QdW223BG/EmKwTBc/IJ/qfp2FDrkUnwFo8U9dZyqnaPhxLqfYjyM1S3vb6p+GGOBszsojoTDSDFz6qj66R4LzvYJxVMwUNRjf1H1ywQr/megg2RzLximy8waqvbda8M5iijegVEiHjlM1W/3h+FcXesphsMY4dMOUnUgOxyuPEzxPQwRNvV3qg5Nj4BreyimwADWe/dRVTMjEm6MoGLzGwtystL6RyOY3qSqdlYU3FpLZw1VW0sK5943MvUCKwJ1noNtjs6Ohge76Zq9ZkfpigU5WWkDYuCfbs1U5HWFR8/Qq4a9W0uK5k4ZmdrTCl8spGIePLPlbqqsc1Afe83O0hULc8alDYiBd7ZyitYMeBfR55rR2fOKP6ioPk2dGvZ+UVI0d8rtqT2tcCexlqK2F3wRn5Q+YVbBqrLKOupkr9lZujAOrmS0UpTb4JeIPkNHZ+cXr6uoPk2vyuBSPhWLEKj45PQJuQWryyqP0Z14uGLdROHIRNBEXDR09EP5r62rOHCazhrD4VKPwxTH+sIA3ZPTJ+YuWV22n+IruHFDC8X2CBjnPoolcGc2FYUwzmsUWXDHsoGKLBhmN0VvuBVfTVE/AAbpaid5CB4MbaLY1QXGuIViLTyZQcVyGGMuxWPwaA0Vk2GI9RRp8Ci2iuLkIBjhT5LNUfAspZFiTwyC72KK7+DNg1SsRvCNp3gZXq2k4iEEXSHFJHgVXUlxejCCbTvFAHiXdIJiXxyCK7KJ5FHoMZGK9xBcwyg2QpdlVMxEUM2iyIMuXXZQNF+HswxMsSAAJRQjoE//eoqDCXBSTO6f1xd+O0iyNRY6jaWi1ALNYCocZROj4JdEikroVkjFk9DcStXxpdfCD2MoXodu4RUU9ptxxmXssOfxnvDVcxRTod9FxyhqLoAqis5aPhwTDp9spRgEH2Q6KLbYoKqlaKTm6Isp0C/sJMnjFvhiERXPQvUNRe9p29lhR04CdBpC8Sl8YiuncIxEuzUUg4Dkgj+paVozygY9plPMh28SaymO9kabAopREGF3vt9MzeFFl8G7lRSZ8FFGK8XX4VA8QjEd7XrM3M0OXz8YCy+qKBLgq3wqnofiTorF0Ax56Rg1J1elW+BBAsVe+My6iYq7IK6keBdOIseV2qn5Pb8f3MqkWAXf9ThM8c8lAOIotuFsF875lRrH5klRcG0+xcPwQ1oLxfeRAP4heQTnGL78X2rqlw2DK59SXAV/zKaiGMAuko5InCt68mcOan5+ohf+z1pP8lQY/GHZQMV4YD3FpXDp4qerqbF/lBWBswyi+AL+ia+maLgcRRQj4IYlY/UpauqKBsPJAxQF8NM1TRQ/RudSPAD34rK3scOuR8/HGcspxsJfOVS8NZbiGXiUtPgINU3v3WFDmx8pEuG3EiqKKVbCC1vm2iZqap5LAtCtleQf8F9sFYWDohzeJczYyQ4V2bEZFGsQgJRGqqqhS2phHTWn9lDkIhBTqWqxQZ+IsRvtdHY9AvI2VX2hW68nfqGmuQsCEl3JdjfCF8OW1bPdtwhQ0gm2mQzfRE3a7KCYj0BNZJs8+Kxf/r6WtTEI2FIqlsMfFgRB5A6KUnSe/vUkX0AnuvUIt8SjM1m6wWQymUwmk8lkMgXRf5vi8rLQxtUhAAAAAElFTkSuQmCC`; + +/** + * Adds image type to base64 encoded images + */ +export const docxSerializer = new DocxSerializerAsync( + { + ...defaultAsyncNodes, + async image(state, node) { + const { src } = node.attrs; + await state.image(src, 70, 'center', undefined, 'png'); + state.closeBlock(node); + }, + }, + defaultMarks, +); + +describe('DOCX Serialization', () => { + it('serializes document structure with async image handling', async () => { + const w = await docxSerializer.serializeAsync( + tdoc( + h1('Welcome to ', code('prosemirror-docx'), strong('!!')), + p('This is ', code('code'), br(), 'hello!'), + ul(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + ul(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + ol(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + p(a('This is '), a(em('emphasized'))), + hr(), + p('Some math in a paragraph: ', math('Ax=b'), ' and then a standalone numbered equation:'), + equation('Ax=b'), + p('And an unnumbered equation:'), + equationUnnumbered('\\sum^{9}_{i=0}i+2 = ??'), + img({ src: 'https://avatars.githubusercontent.com/u/78044536' }), + img({ src: `data:text/plain;base64,${imageBase64Data}` }), + ), + { + async getImageBuffer(src: string) { + const arrayBuffer = await fetch(src).then((res) => res.arrayBuffer()); + return new Uint8Array(arrayBuffer); + }, + }, + ); + const buffer = await writeDocx(w); + fs.writeFileSync(`hello-async.docx`, buffer); + expect(1).toBe(1); + }); + + it('serializes document structure with sync image handling', async () => { + const w = defaultDocxSerializer.serialize( + tdoc( + h1('Welcome to ', code('prosemirror-docx'), strong('!!')), + p('This is ', code('code'), br(), 'hello!'), + ul(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + ul(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + ol(li(p('bullet 1')), li(p('bullet 2')), ul(li(p('bullet 3.1')), li(p('bullet 3.2')))), + p(a('This is '), a(em('emphasized'))), + hr(), + p('Some math in a paragraph: ', math('Ax=b'), ' and then a standalone numbered equation:'), + equation('Ax=b'), + p('And an unnumbered equation:'), + equationUnnumbered('\\sum^{9}_{i=0}i+2 = ??'), + img(), + ), + { + getImageBuffer(src: string) { + return Buffer.from(imageBase64Data, 'base64'); + }, + }, + ); + await writeDocx(w).then((buffer) => { + fs.writeFileSync('hello.docx', buffer); + }); + expect(2).toBe(2); + }); +}); diff --git a/packages/editor-ext/src/lib/prosemirror-docx/types.ts b/packages/editor-ext/src/lib/prosemirror-docx/types.ts new file mode 100644 index 000000000..8c3e47591 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/types.ts @@ -0,0 +1,34 @@ +import { INumberingOptions, Paragraph, ISectionOptions } from 'docx'; + +export type Mutable = { + -readonly [k in keyof T]: T[k]; +}; + +export type IFootnotes = Mutable< + Readonly< + Record< + string, + { + readonly children: readonly Paragraph[]; + } + > + > +>; + +export type INumbering = INumberingOptions['config'][0]; + +export interface SectionConfig { + properties?: ISectionOptions['properties']; + headers?: ISectionOptions['headers']; + footers?: ISectionOptions['footers']; +} + +export interface SerializationState { + numbering: INumberingOptions['config']; + sections?: Array<{ + config: SectionConfig; + children: ISectionOptions['children']; + }>; + children?: ISectionOptions['children']; + footnotes?: IFootnotes; +} diff --git a/packages/editor-ext/src/lib/prosemirror-docx/utils.ts b/packages/editor-ext/src/lib/prosemirror-docx/utils.ts new file mode 100644 index 000000000..a194fb746 --- /dev/null +++ b/packages/editor-ext/src/lib/prosemirror-docx/utils.ts @@ -0,0 +1,89 @@ +import { + Document, + INumberingOptions, + IPropertiesOptions, + ISectionOptions, + Packer, + SectionType, +} from 'docx'; +import { Node as ProsemirrorNode } from 'prosemirror-model'; +import { IFootnotes, SerializationState } from './types'; + +export function createShortId() { + return Math.random().toString(36).slice(2, 11); +} + +export function buildDoc(state: SerializationState, opts?: IPropertiesOptions): Document { + let sections = state?.sections?.map((section) => ({ + properties: section.config.properties || { + type: SectionType.CONTINUOUS, + }, + headers: section.config.headers, + footers: section.config.footers, + children: section.children, + })); + if (!sections) { + sections = [ + { + headers: undefined, + footers: undefined, + properties: { + type: SectionType.CONTINUOUS, + }, + children: state?.children || [], + }, + ]; + } + + const doc = new Document({ + footnotes: state.footnotes, + numbering: { + config: state.numbering, + }, + sections, + ...(opts || {}), + }); + return doc; +} + +/** + * @deprecated - use `buildDoc` instead + * Creates a docx document from the given state. + * */ +export function createDocFromState(state: { + numbering: INumberingOptions['config']; + children: ISectionOptions['children']; + footnotes?: IFootnotes; +}) { + return buildDoc({ + numbering: state.numbering, + sections: [ + { + config: {}, + children: state.children, + }, + ], + footnotes: state.footnotes, + }); +} + +export async function writeDocx( + doc: Document, + /** + * @deprecated use `.then()` or `await` instead + */ + write?: ((buffer: Buffer) => void) | ((buffer: Buffer) => Promise), +) { + const buffer = await Packer.toBuffer(doc); + await write?.(buffer); + return buffer; +} + +export function getLatexFromNode(node: ProsemirrorNode): string { + let math = ''; + node.forEach((child) => { + if (child.isText) math += child.text; + // TODO: improve this as we may have other things in the future + }); + return math; +}