feat: support DOCX uploads via Gotenberg (#2801)

Uploaded .docx files are converted to PDF on the server using a
Gotenberg
sidecar before entering the normal envelope pipeline. The feature is
opt-in via NEXT_PRIVATE_DOCUMENT_CONVERSION_URL; when unset, only PDF
uploads are accepted.

A per-process circuit breaker opens for 30s after a conversion failure
to shed load.

Ships a dev Dockerfile that layers Microsoft Core Fonts and additional
language fonts
onto the upstream Gotenberg image for better fidelity.

Co-authored-by: Ephraim Duncan
<55143799+ephraimduncan@users.noreply.github.com>

Co-authored-by: Ephraim Duncan <55143799+ephraimduncan@users.noreply.github.com>
This commit is contained in:
Lucas Smith
2026-05-13 15:06:21 +10:00
committed by GitHub
parent 8dfd548c08
commit bc184d445f
23 changed files with 1062 additions and 41 deletions
@@ -0,0 +1,90 @@
import { env } from '@documenso/lib/utils/env';
export const DOCUMENT_CONVERSION_MIME_TYPE_DOCX =
'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
const DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS = 30_000;
/**
* Returns whether the document conversion feature is enabled.
*
* Platform-aware:
* - On the server, checks the private URL is configured.
* - On the client, reads the derived public flag injected via `window.__ENV__`.
*/
export const IS_DOCUMENT_CONVERSION_ENABLED = (): boolean => {
if (typeof window === 'undefined') {
return !!env('NEXT_PRIVATE_DOCUMENT_CONVERSION_URL');
}
return env('NEXT_PUBLIC_DOCUMENT_CONVERSION_ENABLED') === 'true';
};
/**
* Returns the configured conversion service base URL as supplied via env, or
* `undefined` if not configured.
*
* Server-side only.
*/
export const DOCUMENT_CONVERSION_URL = (): string | undefined => {
return env('NEXT_PRIVATE_DOCUMENT_CONVERSION_URL');
};
/**
* Returns HTTP Basic auth credentials for the conversion service, or
* `undefined` if either env var is missing. When Gotenberg is started with
* `--api-enable-basic-auth`, every request must carry these credentials.
*
* Server-side only.
*/
export const DOCUMENT_CONVERSION_AUTH = (): { username: string; password: string } | undefined => {
const username = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_USERNAME');
const password = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_PASSWORD');
if (!username || !password) {
return undefined;
}
return { username, password };
};
/**
* Returns the per-request timeout for conversion calls in milliseconds.
*
* Falls back to a 30 second default when the env value is missing or
* unparseable.
*/
export const DOCUMENT_CONVERSION_TIMEOUT_MS = (): number => {
const raw = env('NEXT_PRIVATE_DOCUMENT_CONVERSION_TIMEOUT_MS');
if (!raw) {
return DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS;
}
const parsed = parseInt(raw, 10);
if (Number.isNaN(parsed) || parsed <= 0) {
return DEFAULT_DOCUMENT_CONVERSION_TIMEOUT_MS;
}
return parsed;
};
/**
* Returns the mime type -> extensions map that should be passed to the
* dropzone `accept` config and used for server-side validation.
*
* Always includes PDF; only includes DOCX when the conversion feature is
* enabled.
*/
export const getAllowedUploadMimeTypes = (): Record<string, string[]> => {
const base: Record<string, string[]> = {
'application/pdf': ['.pdf'],
};
if (IS_DOCUMENT_CONVERSION_ENABLED()) {
base[DOCUMENT_CONVERSION_MIME_TYPE_DOCX] = ['.docx'];
}
return base;
};
@@ -0,0 +1,37 @@
/**
* In-process circuit breaker for the document conversion service.
*
* Behaviour: any failure opens the circuit for `COOLDOWN_MS`. While open,
* callers should fail fast without hitting the network. The first request
* after the cooldown is allowed through and either closes the circuit (on
* success) or re-opens it for another cooldown window (on failure).
*
* State is stored on `globalThis` so it survives Vite/Remix HMR in dev and
* is unambiguously process-wide. This module is intentionally pure and
* synchronous: no I/O, no logger import — callers handle observability.
*/
const COOLDOWN_MS = 30_000;
declare global {
// eslint-disable-next-line no-var
var __documensoConversionCircuitOpenedAt: number | null | undefined;
}
export const isCircuitOpen = (): boolean => {
const openedAt = globalThis.__documensoConversionCircuitOpenedAt;
if (!openedAt) {
return false;
}
return Date.now() - openedAt < COOLDOWN_MS;
};
export const recordSuccess = (): void => {
globalThis.__documensoConversionCircuitOpenedAt = null;
};
export const recordFailure = (): void => {
globalThis.__documensoConversionCircuitOpenedAt = Date.now();
};
@@ -0,0 +1,92 @@
import { AppError } from '@documenso/lib/errors/app-error';
import type { Logger } from 'pino';
import {
DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
IS_DOCUMENT_CONVERSION_ENABLED,
} from '../../constants/document-conversion';
import { isCircuitOpen, recordFailure, recordSuccess } from './circuit-breaker';
import { convertDocxToPdfViaGotenberg } from './gotenberg';
type ConvertDocxToPdfOptions = {
buffer: Buffer;
filename: string;
};
const NOT_CONFIGURED_USER_MESSAGE = "Document conversion isn't enabled on this instance. Please upload a PDF.";
const UNAVAILABLE_USER_MESSAGE =
'Document conversion is temporarily unavailable. Please try again shortly or upload a PDF.';
/**
* Converts a DOCX buffer to a PDF buffer via the configured Gotenberg
* conversion service. Guards on feature-enabled and circuit-open state,
* and emits a structured log line for each attempt.
*/
export const convertDocxToPdf = async (
{ buffer, filename }: ConvertDocxToPdfOptions,
logger?: Logger,
): Promise<Buffer> => {
if (!IS_DOCUMENT_CONVERSION_ENABLED()) {
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
message: 'Conversion service not configured',
userMessage: NOT_CONFIGURED_USER_MESSAGE,
statusCode: 503,
});
}
if (isCircuitOpen()) {
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
message: 'Conversion circuit is open; failing fast',
userMessage: UNAVAILABLE_USER_MESSAGE,
statusCode: 503,
});
}
const startedAt = Date.now();
try {
const outputBuffer = await convertDocxToPdfViaGotenberg({ buffer, filename });
recordSuccess();
logger?.info({
event: 'document_conversion_attempt',
filename,
sourceMimeType: DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
durationMs: Date.now() - startedAt,
inputBytes: buffer.byteLength,
outputBytes: outputBuffer.byteLength,
});
return outputBuffer;
} catch (err) {
recordFailure();
const errMessage = err instanceof Error ? err.message : String(err);
const errCode = err instanceof AppError ? err.code : 'UNKNOWN';
const logData = {
event: 'document_conversion_attempt',
filename,
sourceMimeType: DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
durationMs: Date.now() - startedAt,
inputBytes: buffer.byteLength,
failed: true,
errorCode: errCode,
error: errMessage,
};
// A non-2xx from the conversion service surfaces as CONVERSION_FAILED.
// We log those at `error` level (status + truncated body live in the
// AppError message). All other failures stay at `info` to avoid noisy
// logs from transient network blips that the breaker already handles.
if (errCode === 'CONVERSION_FAILED') {
logger?.error(logData);
} else {
logger?.info(logData);
}
throw err;
}
};
@@ -0,0 +1,135 @@
import { AppError } from '@documenso/lib/errors/app-error';
import {
DOCUMENT_CONVERSION_AUTH,
DOCUMENT_CONVERSION_MIME_TYPE_DOCX,
DOCUMENT_CONVERSION_TIMEOUT_MS,
DOCUMENT_CONVERSION_URL,
} from '../../constants/document-conversion';
type ConvertDocxToPdfViaGotenbergOptions = {
buffer: Buffer;
filename: string;
};
const UNAVAILABLE_USER_MESSAGE =
'Document conversion is temporarily unavailable. Please try again shortly or upload a PDF.';
const NOT_CONFIGURED_USER_MESSAGE = "Document conversion isn't enabled on this instance. Please upload a PDF.";
const CONVERSION_FAILED_USER_MESSAGE =
"We couldn't convert this file. Please check it's a valid Word document or upload a PDF instead.";
const MAX_ERROR_BODY_CHARS = 500;
/**
* Posts a DOCX file to the configured Gotenberg-compatible conversion
* service and returns the resulting PDF bytes.
*
* Throws an `AppError` for all failure modes:
* - `CONVERSION_SERVICE_UNAVAILABLE` for missing config, timeout, or
* network errors.
* - `CONVERSION_FAILED` for non-2xx responses from the service.
*/
export const convertDocxToPdfViaGotenberg = async ({
buffer,
filename,
}: ConvertDocxToPdfViaGotenbergOptions): Promise<Buffer> => {
const url = DOCUMENT_CONVERSION_URL();
if (!url) {
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
message: 'Conversion service URL is not configured',
userMessage: NOT_CONFIGURED_USER_MESSAGE,
statusCode: 503,
});
}
const formData = new FormData();
const blob = new Blob([buffer], { type: DOCUMENT_CONVERSION_MIME_TYPE_DOCX });
formData.append('files', blob, filename);
// Tell LibreOffice NOT to export Word content controls (`<w:sdt>`) as PDF
// AcroForm fields. By default Gotenberg renders the field values into form
// appearance streams that reference unembedded base fonts (Times-Roman,
// Times-Bold). Our downstream `normalizePdf` flattens the form, but the
// pdf-lib flattening drops those appearance streams, so every SDT-bound
// string (i.e. virtually all of the body text in Office resume / cover-
// letter templates) ends up invisible in the final PDF. Disabling form
// export makes LibreOffice render those strings as regular text in the
// page content stream, with all glyphs embedded.
formData.append('exportFormFields', 'false');
// When the service is launched with `--api-enable-basic-auth`, every
// route (including `/health` and `/forms/libreoffice/convert`) requires
// HTTP Basic credentials. When auth env vars are not configured we send
// no header and rely on the service running without auth enabled.
const auth = DOCUMENT_CONVERSION_AUTH();
const headers: Record<string, string> = {};
if (auth) {
const encoded = Buffer.from(`${auth.username}:${auth.password}`).toString('base64');
headers.Authorization = `Basic ${encoded}`;
}
const controller = new AbortController();
const timeoutHandle = setTimeout(() => controller.abort(), DOCUMENT_CONVERSION_TIMEOUT_MS());
const convertEndpoint = new URL('/forms/libreoffice/convert', url).toString();
try {
const response = await fetch(convertEndpoint, {
method: 'POST',
body: formData,
headers,
signal: controller.signal,
});
if (!response.ok) {
let body = '';
try {
body = await response.text();
} catch {
body = '';
}
const truncatedBody = body.length > MAX_ERROR_BODY_CHARS ? `${body.slice(0, MAX_ERROR_BODY_CHARS)}...` : body;
throw new AppError('CONVERSION_FAILED', {
message: `Conversion service returned ${response.status}: ${truncatedBody}`,
userMessage: CONVERSION_FAILED_USER_MESSAGE,
statusCode: 400,
});
}
const arrayBuffer = await response.arrayBuffer();
return Buffer.from(arrayBuffer);
} catch (err) {
if (err instanceof AppError) {
throw err;
}
const isAbortError = err instanceof Error && err.name === 'AbortError';
if (isAbortError) {
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
message: 'Conversion service timed out',
userMessage: UNAVAILABLE_USER_MESSAGE,
statusCode: 503,
});
}
const errMessage = err instanceof Error ? err.message : String(err);
throw new AppError('CONVERSION_SERVICE_UNAVAILABLE', {
message: `Conversion service request failed: ${errMessage}`,
userMessage: UNAVAILABLE_USER_MESSAGE,
statusCode: 503,
});
} finally {
clearTimeout(timeoutHandle);
}
};
@@ -0,0 +1,43 @@
import { AppError } from '@documenso/lib/errors/app-error';
import type { Logger } from 'pino';
import { DOCUMENT_CONVERSION_MIME_TYPE_DOCX } from '../../constants/document-conversion';
import { convertDocxToPdf } from './docx-to-pdf';
// We should work on unifying these later on.
type FileInput = {
name: string;
type: string;
arrayBuffer: () => Promise<ArrayBuffer>;
};
const UNSUPPORTED_USER_MESSAGE = "This file type isn't supported. Please upload a PDF or Word document.";
/**
* Entry point for upload routes. Returns a PDF buffer for any supported
* input file:
*
* - PDF in → PDF out (no conversion, no network call).
* - DOCX in → converted PDF out via the configured conversion service.
* - Any other mime type → throws `UNSUPPORTED_FILE_TYPE`.
*
* To support new source formats (PowerPoint, HTML, ...), add a new
* `<format>-to-pdf.ts` sibling and dispatch to it from here.
*/
export const convertToPdf = async (file: FileInput, logger?: Logger): Promise<Buffer> => {
if (file.type === 'application/pdf') {
return Buffer.from(await file.arrayBuffer());
}
if (file.type === DOCUMENT_CONVERSION_MIME_TYPE_DOCX) {
const buffer = Buffer.from(await file.arrayBuffer());
return convertDocxToPdf({ buffer, filename: file.name }, logger);
}
throw new AppError('UNSUPPORTED_FILE_TYPE', {
message: `Unsupported file type: ${file.type}`,
userMessage: UNSUPPORTED_USER_MESSAGE,
statusCode: 400,
});
};
+8 -2
View File
@@ -20,5 +20,11 @@ export const env = <K extends EnvKey>(variable: K): EnvValue<K> => {
return (typeof process !== 'undefined' ? process?.env?.[variable] : undefined) as EnvValue<K>;
};
export const createPublicEnv = () =>
Object.fromEntries(Object.entries(process.env).filter(([key]) => key.startsWith('NEXT_PUBLIC_')));
export const createPublicEnv = () => ({
...Object.fromEntries(Object.entries(process.env).filter(([key]) => key.startsWith('NEXT_PUBLIC_'))),
// Derived from the private URL so the public flag cannot drift from the
// real server-side configuration. Placed last so it wins over any literal
// env var with the same name.
// The `? 'true' : 'false'` might seem dumb but it's because we're expecting env var strings.
NEXT_PUBLIC_DOCUMENT_CONVERSION_ENABLED: process.env.NEXT_PRIVATE_DOCUMENT_CONVERSION_URL ? 'true' : 'false',
});
@@ -1,5 +1,6 @@
import { getServerLimits } from '@documenso/ee/server-only/limits/server';
import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
import { convertToPdf } from '@documenso/lib/server-only/document-conversion';
import { createEnvelope } from '@documenso/lib/server-only/envelope/create-envelope';
import { insertFormValuesInPdf } from '@documenso/lib/server-only/pdf/insert-form-values-in-pdf';
import { putNormalizedPdfFileServerSide } from '@documenso/lib/universal/upload/put-file.server';
@@ -35,7 +36,7 @@ export const createDocumentRoute = authenticatedProcedure
attachments,
} = payload;
let pdf = Buffer.from(await file.arrayBuffer());
let pdf = await convertToPdf(file, ctx.logger);
if (formValues) {
// eslint-disable-next-line require-atomic-updates
@@ -37,5 +37,6 @@ export const createEmbeddingEnvelopeRoute = procedure
bypassDefaultRecipients: true,
},
apiRequestMetadata: ctx.metadata,
logger: ctx.logger,
});
});
@@ -1,11 +1,13 @@
import { getServerLimits } from '@documenso/ee/server-only/limits/server';
import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
import { convertToPdf } from '@documenso/lib/server-only/document-conversion';
import { createEnvelope } from '@documenso/lib/server-only/envelope/create-envelope';
import { extractPdfPlaceholders } from '@documenso/lib/server-only/pdf/auto-place-fields';
import { normalizePdf } from '@documenso/lib/server-only/pdf/normalize-pdf';
import type { ApiRequestMetadata } from '@documenso/lib/universal/extract-request-metadata';
import { putPdfFileServerSide } from '@documenso/lib/universal/upload/put-file.server';
import { EnvelopeType } from '@prisma/client';
import type { Logger } from 'pino';
import { insertFormValuesInPdf } from '../../../lib/server-only/pdf/insert-form-values-in-pdf';
import { authenticatedProcedure } from '../trpc';
@@ -32,6 +34,7 @@ export const createEnvelopeRoute = authenticatedProcedure
teamId: ctx.teamId,
input,
apiRequestMetadata: ctx.metadata,
logger: ctx.logger,
});
});
@@ -48,6 +51,12 @@ type CreateEnvelopeRouteOptions = {
input: TCreateEnvelopeRequest;
apiRequestMetadata: ApiRequestMetadata;
/**
* Optional pino logger threaded from the calling tRPC context. Passed to
* downstream helpers (e.g. `convertToPdf`) for structured logging.
*/
logger?: Logger;
options?: {
bypassDefaultRecipients?: boolean;
};
@@ -58,6 +67,7 @@ export const createEnvelopeRouteCaller = async ({
teamId,
input,
apiRequestMetadata,
logger,
options = {},
}: CreateEnvelopeRouteOptions) => {
const { payload, files } = input;
@@ -96,17 +106,10 @@ export const createEnvelopeRouteCaller = async ({
});
}
if (files.some((file) => !file.type.startsWith('application/pdf'))) {
throw new AppError('INVALID_DOCUMENT_FILE', {
message: 'You cannot upload non-PDF files',
statusCode: 400,
});
}
// For each file: normalize, extract & clean placeholders, then upload.
// For each file: convert to PDF if needed, normalize, extract & clean placeholders, then upload.
const envelopeItems = await Promise.all(
files.map(async (file) => {
let pdf = Buffer.from(await file.arrayBuffer());
let pdf = await convertToPdf(file, logger);
if (formValues) {
// eslint-disable-next-line require-atomic-updates
+3 -4
View File
@@ -1,5 +1,6 @@
import { useCurrentOrganisation } from '@documenso/lib/client-only/providers/organisation';
import { APP_DOCUMENT_UPLOAD_SIZE_LIMIT, IS_BILLING_ENABLED } from '@documenso/lib/constants/app';
import { getAllowedUploadMimeTypes } from '@documenso/lib/constants/document-conversion';
import { megabytesToBytes } from '@documenso/lib/universal/unit-convertions';
import type { MessageDescriptor } from '@lingui/core';
import { msg } from '@lingui/core/macro';
@@ -54,9 +55,7 @@ export const DocumentDropzone = ({
const organisation = useCurrentOrganisation();
const { getRootProps, getInputProps } = useDropzone({
accept: {
'application/pdf': ['.pdf'],
},
accept: getAllowedUploadMimeTypes(),
multiple: allowMultiple,
disabled,
onDrop: (acceptedFiles) => {
@@ -151,7 +150,7 @@ export const DocumentDropzone = ({
<p className="mt-6 font-medium text-foreground">{_(heading[type])}</p>
<p className="mt-1 text-center text-muted-foreground/80 text-sm">
{_(disabled ? disabledMessage : msg`Drag & drop your PDF here.`)}
{_(disabled ? disabledMessage : msg`Drag & drop your document here.`)}
</p>
{disabled && IS_BILLING_ENABLED() && (
@@ -1,6 +1,7 @@
import { useCurrentOrganisation } from '@documenso/lib/client-only/providers/organisation';
import { useSession } from '@documenso/lib/client-only/providers/session';
import { APP_DOCUMENT_UPLOAD_SIZE_LIMIT, IS_BILLING_ENABLED } from '@documenso/lib/constants/app';
import { getAllowedUploadMimeTypes } from '@documenso/lib/constants/document-conversion';
import { megabytesToBytes } from '@documenso/lib/universal/unit-convertions';
import { isPersonalLayout } from '@documenso/lib/utils/organisations';
import type { MessageDescriptor } from '@lingui/core';
@@ -51,9 +52,7 @@ export const DocumentUploadButton = ({
const isPersonalLayoutMode = isPersonalLayout(organisations);
const { getRootProps, getInputProps } = useDropzone({
accept: {
'application/pdf': ['.pdf'],
},
accept: getAllowedUploadMimeTypes(),
multiple: internalVersion === '2',
disabled,
maxFiles,