mirror of
https://github.com/documenso/documenso.git
synced 2026-06-22 04:12:06 +10:00
feat: support DOCX uploads via Gotenberg (#2801)
Uploaded .docx files are converted to PDF on the server using a Gotenberg sidecar before entering the normal envelope pipeline. The feature is opt-in via NEXT_PRIVATE_DOCUMENT_CONVERSION_URL; when unset, only PDF uploads are accepted. A per-process circuit breaker opens for 30s after a conversion failure to shed load. Ships a dev Dockerfile that layers Microsoft Core Fonts and additional language fonts onto the upstream Gotenberg image for better fidelity. Co-authored-by: Ephraim Duncan <55143799+ephraimduncan@users.noreply.github.com> Co-authored-by: Ephraim Duncan <55143799+ephraimduncan@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
import { env } from '@documenso/lib/utils/env';
|
||||
|
||||
export const DOCUMENT_CONVERSION_MIME_TYPE_DOCX =
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
||||
|
||||
const DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS = 30_000;
|
||||
|
||||
/**
|
||||
* Returns whether the document conversion feature is enabled.
|
||||
*
|
||||
* Platform-aware:
|
||||
* - On the server, checks the private URL is configured.
|
||||
* - On the client, reads the derived public flag injected via `window.__ENV__`.
|
||||
*/
|
||||
export const IS_DOCUMENT_CONVERSION_ENABLED = (): boolean => {
|
||||
if (typeof window === 'undefined') {
|
||||
return !!env('NEXT_PRIVATE_DOCUMENT_CONVERSION_URL');
|
||||
}
|
||||
|
||||
return env('NEXT_PUBLIC_DOCUMENT_CONVERSION_ENABLED') === 'true';
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the configured conversion service base URL as supplied via env, or
|
||||
* `undefined` if not configured.
|
||||
*
|
||||
* Server-side only.
|
||||
*/
|
||||
export const DOCUMENT_CONVERSION_URL = (): string | undefined => {
|
||||
return env('NEXT_PRIVATE_DOCUMENT_CONVERSION_URL');
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns HTTP Basic auth credentials for the conversion service, or
|
||||
* `undefined` if either env var is missing. When Gotenberg is started with
|
||||
* `--api-enable-basic-auth`, every request must carry these credentials.
|
||||
*
|
||||
* Server-side only.
|
||||
*/
|
||||
export const DOCUMENT_CONVERSION_AUTH = (): { username: string; password: string } | undefined => {
|
||||
const username = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_USERNAME');
|
||||
const password = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_PASSWORD');
|
||||
|
||||
if (!username || !password) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return { username, password };
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the per-request timeout for conversion calls in milliseconds.
|
||||
*
|
||||
* Falls back to a 30 second default when the env value is missing or
|
||||
* unparseable.
|
||||
*/
|
||||
export const DOCUMENT_CONVERSION_TIMEOUT_MS = (): number => {
|
||||
const raw = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_TIMEOUT_MS');
|
||||
|
||||
if (!raw) {
|
||||
return DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
const parsed = parseInt(raw, 10);
|
||||
|
||||
if (Number.isNaN(parsed) || parsed <= 0) {
|
||||
return DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the mime type -> extensions map that should be passed to the
|
||||
* dropzone `accept` config and used for server-side validation.
|
||||
*
|
||||
* Always includes PDF; only includes DOCX when the conversion feature is
|
||||
* enabled.
|
||||
*/
|
||||
export const getAllowedUploadMimeTypes = (): Record<string, string[]> => {
|
||||
const base: Record<string, string[]> = {
|
||||
'application/pdf': ['.pdf'],
|
||||
};
|
||||
|
||||
if (IS_DOCUMENT_CONVERSION_ENABLED()) {
|
||||
base[DOCUMENT_CONVERSION_MIME_TYPE_DOCX] = ['.docx'];
|
||||
}
|
||||
|
||||
return base;
|
||||
};
|
||||
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* In-process circuit breaker for the document conversion service.
|
||||
*
|
||||
* Behaviour: any failure opens the circuit for `COOLDOWN_MS`. While open,
|
||||
* callers should fail fast without hitting the network. The first request
|
||||
* after the cooldown is allowed through and either closes the circuit (on
|
||||
* success) or re-opens it for another cooldown window (on failure).
|
||||
*
|
||||
* State is stored on `globalThis` so it survives Vite/Remix HMR in dev and
|
||||
* is unambiguously process-wide. This module is intentionally pure and
|
||||
* synchronous: no I/O, no logger import — callers handle observability.
|
||||
*/
|
||||
|
||||
const COOLDOWN_MS = 30_000;
|
||||
|
||||
declare global {
|
||||
// eslint-disable-next-line no-var
|
||||
var __documensoConversionCircuitOpenedAt: number | null | undefined;
|
||||
}
|
||||
|
||||
export const isCircuitOpen = (): boolean => {
|
||||
const openedAt = globalThis.__documensoConversionCircuitOpenedAt;
|
||||
|
||||
if (!openedAt) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Date.now() - openedAt < COOLDOWN_MS;
|
||||
};
|
||||
|
||||
export const recordSuccess = (): void => {
|
||||
globalThis.__documensoConversionCircuitOpenedAt = null;
|
||||
};
|
||||
|
||||
export const recordFailure = (): void => {
|
||||
globalThis.__documensoConversionCircuitOpenedAt = Date.now();
|
||||
};
|
||||
@@ -0,0 +1,92 @@
|
||||
import { AppError } from '@documenso/lib/errors/app-error';
|
||||
import type { Logger } from 'pino';
|
||||
|
||||
import {
|
||||
DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
|
||||
IS_DOCUMENT_CONVERSION_ENABLED,
|
||||
} from '../../constants/document-conversion';
|
||||
import { isCircuitOpen, recordFailure, recordSuccess } from './circuit-breaker';
|
||||
import { convertDocxToPdfViaGotenberg } from './gotenberg';
|
||||
|
||||
type ConvertDocxToPdfOptions = {
|
||||
buffer: Buffer;
|
||||
filename: string;
|
||||
};
|
||||
|
||||
const NOT_CONFIGURED_USER_MESSAGE = "Document conversion isn't enabled on this instance. Please upload a PDF.";
|
||||
|
||||
const UNAVAILABLE_USER_MESSAGE =
|
||||
'Document conversion is temporarily unavailable. Please try again shortly or upload a PDF.';
|
||||
|
||||
/**
|
||||
* Converts a DOCX buffer to a PDF buffer via the configured Gotenberg
|
||||
* conversion service. Guards on feature-enabled and circuit-open state,
|
||||
* and emits a structured log line for each attempt.
|
||||
*/
|
||||
export const convertDocxToPdf = async (
|
||||
{ buffer, filename }: ConvertDocxToPdfOptions,
|
||||
logger?: Logger,
|
||||
): Promise<Buffer> => {
|
||||
if (!IS_DOCUMENT_CONVERSION_ENABLED()) {
|
||||
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
|
||||
message: 'Conversion service not configured',
|
||||
userMessage: NOT_CONFIGURED_USER_MESSAGE,
|
||||
statusCode: 503,
|
||||
});
|
||||
}
|
||||
|
||||
if (isCircuitOpen()) {
|
||||
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
|
||||
message: 'Conversion circuit is open; failing fast',
|
||||
userMessage: UNAVAILABLE_USER_MESSAGE,
|
||||
statusCode: 503,
|
||||
});
|
||||
}
|
||||
|
||||
const startedAt = Date.now();
|
||||
|
||||
try {
|
||||
const outputBuffer = await convertDocxToPdfViaGotenberg({ buffer, filename });
|
||||
|
||||
recordSuccess();
|
||||
|
||||
logger?.info({
|
||||
event: 'document_conversion_attempt',
|
||||
filename,
|
||||
sourceMimeType: DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
|
||||
durationMs: Date.now() - startedAt,
|
||||
inputBytes: buffer.byteLength,
|
||||
outputBytes: outputBuffer.byteLength,
|
||||
});
|
||||
|
||||
return outputBuffer;
|
||||
} catch (err) {
|
||||
recordFailure();
|
||||
|
||||
const errMessage = err instanceof Error ? err.message : String(err);
|
||||
const errCode = err instanceof AppError ? err.code : 'UNKNOWN';
|
||||
|
||||
const logData = {
|
||||
event: 'document_conversion_attempt',
|
||||
filename,
|
||||
sourceMimeType: DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
|
||||
durationMs: Date.now() - startedAt,
|
||||
inputBytes: buffer.byteLength,
|
||||
failed: true,
|
||||
errorCode: errCode,
|
||||
error: errMessage,
|
||||
};
|
||||
|
||||
// A non-2xx from the conversion service surfaces as CONVERSION_FAILED.
|
||||
// We log those at `error` level (status + truncated body live in the
|
||||
// AppError message). All other failures stay at `info` to avoid noisy
|
||||
// logs from transient network blips that the breaker already handles.
|
||||
if (errCode === 'CONVERSION_FAILED') {
|
||||
logger?.error(logData);
|
||||
} else {
|
||||
logger?.info(logData);
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,135 @@
|
||||
import { AppError } from '@documenso/lib/errors/app-error';
|
||||
|
||||
import {
|
||||
DOCUMENT_CONVERSION_AUTH,
|
||||
DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
|
||||
DOCUMENT_CONVERSION_TIMEOUT_MS,
|
||||
DOCUMENT_CONVERSION_URL,
|
||||
} from '../../constants/document-conversion';
|
||||
|
||||
type ConvertDocxToPdfViaGotenbergOptions = {
|
||||
buffer: Buffer;
|
||||
filename: string;
|
||||
};
|
||||
|
||||
const UNAVAILABLE_USER_MESSAGE =
|
||||
'Document conversion is temporarily unavailable. Please try again shortly or upload a PDF.';
|
||||
|
||||
const NOT_CONFIGURED_USER_MESSAGE = "Document conversion isn't enabled on this instance. Please upload a PDF.";
|
||||
|
||||
const CONVERSION_FAILED_USER_MESSAGE =
|
||||
"We couldn't convert this file. Please check it's a valid Word document or upload a PDF instead.";
|
||||
|
||||
const MAX_ERROR_BODY_CHARS = 500;
|
||||
|
||||
/**
|
||||
* Posts a DOCX file to the configured Gotenberg-compatible conversion
|
||||
* service and returns the resulting PDF bytes.
|
||||
*
|
||||
* Throws an `AppError` for all failure modes:
|
||||
* - `CONVERSION_SERVICE_UNAVAILABLE` for missing config, timeout, or
|
||||
* network errors.
|
||||
* - `CONVERSION_FAILED` for non-2xx responses from the service.
|
||||
*/
|
||||
export const convertDocxToPdfViaGotenberg = async ({
|
||||
buffer,
|
||||
filename,
|
||||
}: ConvertDocxToPdfViaGotenbergOptions): Promise<Buffer> => {
|
||||
const url = DOCUMENT_CONVERSION_URL();
|
||||
|
||||
if (!url) {
|
||||
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
|
||||
message: 'Conversion service URL is not configured',
|
||||
userMessage: NOT_CONFIGURED_USER_MESSAGE,
|
||||
statusCode: 503,
|
||||
});
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
const blob = new Blob([buffer], { type: DOCUMENT_CONVERSION_MIME_TYPE_DOCX });
|
||||
|
||||
formData.append('files', blob, filename);
|
||||
|
||||
// Tell LibreOffice NOT to export Word content controls (`<w:sdt>`) as PDF
|
||||
// AcroForm fields. By default Gotenberg renders the field values into form
|
||||
// appearance streams that reference unembedded base fonts (Times-Roman,
|
||||
// Times-Bold). Our downstream `normalizePdf` flattens the form, but the
|
||||
// pdf-lib flattening drops those appearance streams, so every SDT-bound
|
||||
// string (i.e. virtually all of the body text in Office resume / cover-
|
||||
// letter templates) ends up invisible in the final PDF. Disabling form
|
||||
// export makes LibreOffice render those strings as regular text in the
|
||||
// page content stream, with all glyphs embedded.
|
||||
formData.append('exportFormFields', 'false');
|
||||
|
||||
// When the service is launched with `--api-enable-basic-auth`, every
|
||||
// route (including `/health` and `/forms/libreoffice/convert`) requires
|
||||
// HTTP Basic credentials. When auth env vars are not configured we send
|
||||
// no header and rely on the service running without auth enabled.
|
||||
const auth = DOCUMENT_CONVERSION_AUTH();
|
||||
const headers: Record<string, string> = {};
|
||||
|
||||
if (auth) {
|
||||
const encoded = Buffer.from(`${auth.username}:${auth.password}`).toString('base64');
|
||||
headers.Authorization = `Basic ${encoded}`;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeoutHandle = setTimeout(() => controller.abort(), DOCUMENT_CONVERSION_TIMEOUT_MS());
|
||||
|
||||
const convertEndpoint = new URL('/forms/libreoffice/convert', url).toString();
|
||||
|
||||
try {
|
||||
const response = await fetch(convertEndpoint, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
let body = '';
|
||||
|
||||
try {
|
||||
body = await response.text();
|
||||
} catch {
|
||||
body = '';
|
||||
}
|
||||
|
||||
const truncatedBody = body.length > MAX_ERROR_BODY_CHARS ? `${body.slice(0, MAX_ERROR_BODY_CHARS)}...` : body;
|
||||
|
||||
throw new AppError('CONVERSION_FAILED', {
|
||||
message: `Conversion service returned ${response.status}: ${truncatedBody}`,
|
||||
userMessage: CONVERSION_FAILED_USER_MESSAGE,
|
||||
statusCode: 400,
|
||||
});
|
||||
}
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
|
||||
return Buffer.from(arrayBuffer);
|
||||
} catch (err) {
|
||||
if (err instanceof AppError) {
|
||||
throw err;
|
||||
}
|
||||
|
||||
const isAbortError = err instanceof Error && err.name === 'AbortError';
|
||||
|
||||
if (isAbortError) {
|
||||
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
|
||||
message: 'Conversion service timed out',
|
||||
userMessage: UNAVAILABLE_USER_MESSAGE,
|
||||
statusCode: 503,
|
||||
});
|
||||
}
|
||||
|
||||
const errMessage = err instanceof Error ? err.message : String(err);
|
||||
|
||||
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
|
||||
message: `Conversion service request failed: ${errMessage}`,
|
||||
userMessage: UNAVAILABLE_USER_MESSAGE,
|
||||
statusCode: 503,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeoutHandle);
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,43 @@
|
||||
import { AppError } from '@documenso/lib/errors/app-error';
|
||||
import type { Logger } from 'pino';
|
||||
|
||||
import { DOCUMENT_CONVERSION_MIME_TYPE_DOCX } from '../../constants/document-conversion';
|
||||
import { convertDocxToPdf } from './docx-to-pdf';
|
||||
|
||||
// We should work on unifying these later on.
|
||||
type FileInput = {
|
||||
name: string;
|
||||
type: string;
|
||||
arrayBuffer: () => Promise<ArrayBuffer>;
|
||||
};
|
||||
|
||||
const UNSUPPORTED_USER_MESSAGE = "This file type isn't supported. Please upload a PDF or Word document.";
|
||||
|
||||
/**
|
||||
* Entry point for upload routes. Returns a PDF buffer for any supported
|
||||
* input file:
|
||||
*
|
||||
* - PDF in → PDF out (no conversion, no network call).
|
||||
* - DOCX in → converted PDF out via the configured conversion service.
|
||||
* - Any other mime type → throws `UNSUPPORTED_FILE_TYPE`.
|
||||
*
|
||||
* To support new source formats (PowerPoint, HTML, ...), add a new
|
||||
* `<format>-to-pdf.ts` sibling and dispatch to it from here.
|
||||
*/
|
||||
export const convertToPdf = async (file: FileInput, logger?: Logger): Promise<Buffer> => {
|
||||
if (file.type === 'application/pdf') {
|
||||
return Buffer.from(await file.arrayBuffer());
|
||||
}
|
||||
|
||||
if (file.type === DOCUMENT_CONVERSION_MIME_TYPE_DOCX) {
|
||||
const buffer = Buffer.from(await file.arrayBuffer());
|
||||
|
||||
return convertDocxToPdf({ buffer, filename: file.name }, logger);
|
||||
}
|
||||
|
||||
throw new AppError('UNSUPPORTED_FILE_TYPE', {
|
||||
message: `Unsupported file type: ${file.type}`,
|
||||
userMessage: UNSUPPORTED_USER_MESSAGE,
|
||||
statusCode: 400,
|
||||
});
|
||||
};
|
||||
@@ -20,5 +20,11 @@ export const env = <K extends EnvKey>(variable: K): EnvValue<K> => {
|
||||
return (typeof process !== 'undefined' ? process?.env?.[variable] : undefined) as EnvValue<K>;
|
||||
};
|
||||
|
||||
export const createPublicEnv = () =>
|
||||
Object.fromEntries(Object.entries(process.env).filter(([key]) => key.startsWith('NEXT_PUBLIC_')));
|
||||
export const createPublicEnv = () => ({
|
||||
...Object.fromEntries(Object.entries(process.env).filter(([key]) => key.startsWith('NEXT_PUBLIC_'))),
|
||||
// Derived from the private URL so the public flag cannot drift from the
|
||||
// real server-side configuration. Placed last so it wins over any literal
|
||||
// env var with the same name.
|
||||
// The `? 'true' : 'false'` might seem dumb but it's because we're expecting env var strings.
|
||||
NEXT_PUBLIC_DOCUMENT_CONVERSION_ENABLED: process.env.NEXT_PRIVATE_DOCUMENT_CONVERSION_URL ? 'true' : 'false',
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user