From 5da92a538ad1f24bc8e06b80dfd6f2d410dc69cf Mon Sep 17 00:00:00 2001 From: Philip Okugbe <16838612+Philipinho@users.noreply.github.com> Date: Tue, 29 Jul 2025 22:47:13 +0100 Subject: [PATCH] feat: add unaccent support for accent-insensitive search (#1402) - Add PostgreSQL unaccent and pg_trgm extensions - Create immutable f_unaccent wrapper function for performance - Update all search queries to use f_unaccent for accent-insensitive matching - Add 1MB limit to tsvector content to prevent errors on large documents - Update full-text search trigger to use f_unaccent - Fix MultiSelect client-side filtering to show server results properly --- .../space/components/multi-member-select.tsx | 56 ++++++------------- apps/server/src/common/helpers/utils.ts | 4 ++ apps/server/src/core/search/search.service.ts | 46 ++++++++++++--- .../services/workspace-invitation.service.ts | 7 ++- ...234-add-unaccent-pg_trm-update-tsvector.ts | 50 +++++++++++++++++ .../database/repos/group/group-user.repo.ts | 3 +- .../src/database/repos/group/group.repo.ts | 6 +- .../database/repos/space/space-member.repo.ts | 29 ++++++++-- .../src/database/repos/space/space.repo.ts | 6 +- .../src/database/repos/user/user.repo.ts | 11 +++- 10 files changed, 154 insertions(+), 64 deletions(-) create mode 100644 apps/server/src/database/migrations/20250723T125234-add-unaccent-pg_trm-update-tsvector.ts diff --git a/apps/client/src/features/space/components/multi-member-select.tsx b/apps/client/src/features/space/components/multi-member-select.tsx index 602a6232..4a0a7fe8 100644 --- a/apps/client/src/features/space/components/multi-member-select.tsx +++ b/apps/client/src/features/space/components/multi-member-select.tsx @@ -61,47 +61,26 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) { type: "group", })); - // Function to merge items into groups without duplicates - const mergeItemsIntoGroups = (existingGroups, newItems, groupName) => { - const existingValues = new Set( - existingGroups.flatMap((group) => - group.items.map((item) => item.value), - ), - ); - const newItemsFiltered = newItems.filter( - (item) => !existingValues.has(item.value), - ); - - const updatedGroups = existingGroups.map((group) => { - if (group.group === groupName) { - return { ...group, items: [...group.items, ...newItemsFiltered] }; - } - return group; + // Create fresh data structure based on current search results + const newData = []; + + if (userItems && userItems.length > 0) { + newData.push({ + group: t("Select a user"), + items: userItems, }); + } + + if (groupItems && groupItems.length > 0) { + newData.push({ + group: t("Select a group"), + items: groupItems, + }); + } - // Use spread syntax to avoid mutation - return updatedGroups.some((group) => group.group === groupName) - ? updatedGroups - : [...updatedGroups, { group: groupName, items: newItemsFiltered }]; - }; - - // Merge user items into groups - const updatedUserGroups = mergeItemsIntoGroups( - data, - userItems, - t("Select a user"), - ); - - // Merge group items into groups - const finalData = mergeItemsIntoGroups( - updatedUserGroups, - groupItems, - t("Select a group"), - ); - - setData(finalData); + setData(newData); } - }, [suggestion, data]); + }, [suggestion, t]); return ( options} clearable variant="filled" onChange={onChange} diff --git a/apps/server/src/common/helpers/utils.ts b/apps/server/src/common/helpers/utils.ts index edd9a903..06a23704 100644 --- a/apps/server/src/common/helpers/utils.ts +++ b/apps/server/src/common/helpers/utils.ts @@ -76,6 +76,10 @@ export function sanitizeFileName(fileName: string): string { return sanitizedFilename.slice(0, 255); } +export function removeAccent(str: string): string { + if (!str) return str; + return str.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); + export function extractBearerTokenFromHeader( request: FastifyRequest, ): string | undefined { diff --git a/apps/server/src/core/search/search.service.ts b/apps/server/src/core/search/search.service.ts index 3ea1e535..db135c22 100644 --- a/apps/server/src/core/search/search.service.ts +++ b/apps/server/src/core/search/search.service.ts @@ -44,12 +44,18 @@ export class SearchService { 'creatorId', 'createdAt', 'updatedAt', - sql`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('rank'), - sql`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as( + sql`ts_rank(tsv, to_tsquery('english', f_unaccent(${searchQuery})))`.as( + 'rank', + ), + sql`ts_headline('english', text_content, to_tsquery('english', f_unaccent(${searchQuery})),'MinWords=9, MaxWords=10, MaxFragments=3')`.as( 'highlight', ), ]) - .where('tsv', '@@', sql`to_tsquery(${searchQuery})`) + .where( + 'tsv', + '@@', + sql`to_tsquery('english', f_unaccent(${searchQuery}))`, + ) .$if(Boolean(searchParams.creatorId), (qb) => qb.where('creatorId', '=', searchParams.creatorId), ) @@ -138,21 +144,37 @@ export class SearchService { const query = suggestion.query.toLowerCase().trim(); if (suggestion.includeUsers) { - users = await this.db + const userQuery = this.db .selectFrom('users') .select(['id', 'name', 'email', 'avatarUrl']) - .where((eb) => eb(sql`LOWER(users.name)`, 'like', `%${query}%`)) .where('workspaceId', '=', workspaceId) .where('deletedAt', 'is', null) - .limit(limit) - .execute(); + .where((eb) => + eb.or([ + eb( + sql`LOWER(f_unaccent(users.name))`, + 'like', + sql`LOWER(f_unaccent(${`%${query}%`}))`, + ), + eb(sql`users.email`, 'ilike', sql`f_unaccent(${`%${query}%`})`), + ]), + ) + .limit(limit); + + users = await userQuery.execute(); } if (suggestion.includeGroups) { groups = await this.db .selectFrom('groups') .select(['id', 'name', 'description']) - .where((eb) => eb(sql`LOWER(groups.name)`, 'like', `%${query}%`)) + .where((eb) => + eb( + sql`LOWER(f_unaccent(groups.name))`, + 'like', + sql`LOWER(f_unaccent(${`%${query}%`}))`, + ), + ) .where('workspaceId', '=', workspaceId) .limit(limit) .execute(); @@ -162,7 +184,13 @@ export class SearchService { let pageSearch = this.db .selectFrom('pages') .select(['id', 'slugId', 'title', 'icon', 'spaceId']) - .where((eb) => eb(sql`LOWER(pages.title)`, 'like', `%${query}%`)) + .where((eb) => + eb( + sql`LOWER(f_unaccent(pages.title))`, + 'like', + sql`LOWER(f_unaccent(${`%${query}%`}))`, + ), + ) .where('workspaceId', '=', workspaceId) .limit(limit); diff --git a/apps/server/src/core/workspace/services/workspace-invitation.service.ts b/apps/server/src/core/workspace/services/workspace-invitation.service.ts index 90485f0a..2defcbba 100644 --- a/apps/server/src/core/workspace/services/workspace-invitation.service.ts +++ b/apps/server/src/core/workspace/services/workspace-invitation.service.ts @@ -8,6 +8,7 @@ import { AcceptInviteDto, InviteUserDto } from '../dto/invitation.dto'; import { UserRepo } from '@docmost/db/repos/user/user.repo'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB } from '@docmost/db/types/kysely.types'; +import { sql } from 'kysely'; import { executeTx } from '@docmost/db/utils'; import { Group, @@ -55,7 +56,11 @@ export class WorkspaceInvitationService { if (pagination.query) { query = query.where((eb) => - eb('email', 'ilike', `%${pagination.query}%`), + eb( + sql`email`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ), ); } diff --git a/apps/server/src/database/migrations/20250723T125234-add-unaccent-pg_trm-update-tsvector.ts b/apps/server/src/database/migrations/20250723T125234-add-unaccent-pg_trm-update-tsvector.ts new file mode 100644 index 00000000..389a03c1 --- /dev/null +++ b/apps/server/src/database/migrations/20250723T125234-add-unaccent-pg_trm-update-tsvector.ts @@ -0,0 +1,50 @@ +import { type Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + // Create unaccent extension + await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db); + + // Create pg_trgm extension + await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db); + + // Create IMMUTABLE wrapper function for unaccent + // This allows us to create indexes on unaccented columns for better performance + // https://stackoverflow.com/a/11007216/8299075 + await sql` + CREATE OR REPLACE FUNCTION f_unaccent(text) RETURNS text + AS $$ + SELECT unaccent('unaccent', $1); + $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT; + `.execute(db); + + // Update the pages tsvector trigger to use the immutable function + await sql` + CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$ + begin + new.tsv := + setweight(to_tsvector('english', f_unaccent(coalesce(new.title, ''))), 'A') || + setweight(to_tsvector('english', f_unaccent(substring(coalesce(new.text_content, ''), 1, 1000000))), 'B'); + return new; + end; + $$ LANGUAGE plpgsql; + `.execute(db); +} + +export async function down(db: Kysely): Promise { + await sql` + CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$ + begin + new.tsv := + setweight(to_tsvector('english', coalesce(new.title, '')), 'A') || + setweight(to_tsvector('english', coalesce(new.text_content, '')), 'B'); + return new; + end; + $$ LANGUAGE plpgsql; + `.execute(db); + + await sql`DROP FUNCTION IF EXISTS f_unaccent(text)`.execute(db); + + await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db); + + await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db); +} diff --git a/apps/server/src/database/repos/group/group-user.repo.ts b/apps/server/src/database/repos/group/group-user.repo.ts index 3136f077..5c144ec4 100644 --- a/apps/server/src/database/repos/group/group-user.repo.ts +++ b/apps/server/src/database/repos/group/group-user.repo.ts @@ -6,6 +6,7 @@ import { import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types'; import { dbOrTx, executeTx } from '@docmost/db/utils'; +import { sql } from 'kysely'; import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types'; import { PaginationOptions } from '../../pagination/pagination-options'; import { executeWithPagination } from '@docmost/db/pagination/pagination'; @@ -56,7 +57,7 @@ export class GroupUserRepo { if (pagination.query) { query = query.where((eb) => - eb('users.name', 'ilike', `%${pagination.query}%`), + eb(sql`f_unaccent(users.name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`), ); } diff --git a/apps/server/src/database/repos/group/group.repo.ts b/apps/server/src/database/repos/group/group.repo.ts index 67aaa94a..6d0e4257 100644 --- a/apps/server/src/database/repos/group/group.repo.ts +++ b/apps/server/src/database/repos/group/group.repo.ts @@ -114,10 +114,10 @@ export class GroupRepo { if (pagination.query) { query = query.where((eb) => - eb('name', 'ilike', `%${pagination.query}%`).or( - 'description', + eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or( + sql`f_unaccent(description)`, 'ilike', - `%${pagination.query}%`, + sql`f_unaccent(${'%' + pagination.query + '%'})`, ), ); } diff --git a/apps/server/src/database/repos/space/space-member.repo.ts b/apps/server/src/database/repos/space/space-member.repo.ts index b9c4fbf9..0850c5e1 100644 --- a/apps/server/src/database/repos/space/space-member.repo.ts +++ b/apps/server/src/database/repos/space/space-member.repo.ts @@ -2,6 +2,7 @@ import { BadRequestException, Injectable } from '@nestjs/common'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types'; import { dbOrTx } from '@docmost/db/utils'; +import { sql } from 'kysely'; import { InsertableSpaceMember, SpaceMember, @@ -119,9 +120,21 @@ export class SpaceMemberRepo { if (pagination.query) { query = query.where((eb) => - eb('users.name', 'ilike', `%${pagination.query}%`) - .or('users.email', 'ilike', `%${pagination.query}%`) - .or('groups.name', 'ilike', `%${pagination.query}%`), + eb( + sql`f_unaccent(users.name)`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ) + .or( + sql`users.email`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ) + .or( + sql`f_unaccent(groups.name)`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ), ); } @@ -228,10 +241,14 @@ export class SpaceMemberRepo { if (pagination.query) { query = query.where((eb) => - eb('name', 'ilike', `%${pagination.query}%`).or( - 'description', + eb( + sql`f_unaccent(name)`, 'ilike', - `%${pagination.query}%`, + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ).or( + sql`f_unaccent(description)`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, ), ); } diff --git a/apps/server/src/database/repos/space/space.repo.ts b/apps/server/src/database/repos/space/space.repo.ts index 6405a31d..d92f9828 100644 --- a/apps/server/src/database/repos/space/space.repo.ts +++ b/apps/server/src/database/repos/space/space.repo.ts @@ -110,10 +110,10 @@ export class SpaceRepo { if (pagination.query) { query = query.where((eb) => - eb('name', 'ilike', `%${pagination.query}%`).or( - 'description', + eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or( + sql`f_unaccent(description)`, 'ilike', - `%${pagination.query}%`, + sql`f_unaccent(${'%' + pagination.query + '%'})`, ), ); } diff --git a/apps/server/src/database/repos/user/user.repo.ts b/apps/server/src/database/repos/user/user.repo.ts index 190670e3..85474797 100644 --- a/apps/server/src/database/repos/user/user.repo.ts +++ b/apps/server/src/database/repos/user/user.repo.ts @@ -4,6 +4,7 @@ import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types'; import { DB, Users } from '@docmost/db/types/db'; import { hashPassword } from '../../../common/helpers'; import { dbOrTx } from '@docmost/db/utils'; +import { sql } from 'kysely'; import { InsertableUser, UpdatableUser, @@ -149,10 +150,14 @@ export class UserRepo { if (pagination.query) { query = query.where((eb) => - eb('users.name', 'ilike', `%${pagination.query}%`).or( - 'users.email', + eb( + sql`f_unaccent(users.name)`, 'ilike', - `%${pagination.query}%`, + sql`f_unaccent(${'%' + pagination.query + '%'})`, + ).or( + sql`users.email`, + 'ilike', + sql`f_unaccent(${'%' + pagination.query + '%'})`, ), ); }