feat: add unaccent support for accent-insensitive search (#1402)

- Add PostgreSQL unaccent and pg_trgm extensions
- Create immutable f_unaccent wrapper function for performance
- Update all search queries to use f_unaccent for accent-insensitive matching
- Add 1MB limit to tsvector content to prevent errors on large documents
- Update full-text search trigger to use f_unaccent
- Fix MultiSelect client-side filtering to show server results properly
This commit is contained in:
Philip Okugbe
2025-07-29 22:47:13 +01:00
committed by GitHub
parent f90c5a636b
commit 5da92a538a
10 changed files with 154 additions and 64 deletions

View File

@ -61,47 +61,26 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
type: "group", type: "group",
})); }));
// Function to merge items into groups without duplicates // Create fresh data structure based on current search results
const mergeItemsIntoGroups = (existingGroups, newItems, groupName) => { const newData = [];
const existingValues = new Set(
existingGroups.flatMap((group) =>
group.items.map((item) => item.value),
),
);
const newItemsFiltered = newItems.filter(
(item) => !existingValues.has(item.value),
);
const updatedGroups = existingGroups.map((group) => { if (userItems && userItems.length > 0) {
if (group.group === groupName) { newData.push({
return { ...group, items: [...group.items, ...newItemsFiltered] }; group: t("Select a user"),
} items: userItems,
return group;
}); });
}
// Use spread syntax to avoid mutation if (groupItems && groupItems.length > 0) {
return updatedGroups.some((group) => group.group === groupName) newData.push({
? updatedGroups group: t("Select a group"),
: [...updatedGroups, { group: groupName, items: newItemsFiltered }]; items: groupItems,
}; });
}
// Merge user items into groups setData(newData);
const updatedUserGroups = mergeItemsIntoGroups(
data,
userItems,
t("Select a user"),
);
// Merge group items into groups
const finalData = mergeItemsIntoGroups(
updatedUserGroups,
groupItems,
t("Select a group"),
);
setData(finalData);
} }
}, [suggestion, data]); }, [suggestion, t]);
return ( return (
<MultiSelect <MultiSelect
@ -114,6 +93,7 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
searchable searchable
searchValue={searchValue} searchValue={searchValue}
onSearchChange={setSearchValue} onSearchChange={setSearchValue}
filter={({ options }) => options}
clearable clearable
variant="filled" variant="filled"
onChange={onChange} onChange={onChange}

View File

@ -76,6 +76,10 @@ export function sanitizeFileName(fileName: string): string {
return sanitizedFilename.slice(0, 255); return sanitizedFilename.slice(0, 255);
} }
export function removeAccent(str: string): string {
if (!str) return str;
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
export function extractBearerTokenFromHeader( export function extractBearerTokenFromHeader(
request: FastifyRequest, request: FastifyRequest,
): string | undefined { ): string | undefined {

View File

@ -44,12 +44,18 @@ export class SearchService {
'creatorId', 'creatorId',
'createdAt', 'createdAt',
'updatedAt', 'updatedAt',
sql<number>`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('rank'), sql<number>`ts_rank(tsv, to_tsquery('english', f_unaccent(${searchQuery})))`.as(
sql<string>`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as( 'rank',
),
sql<string>`ts_headline('english', text_content, to_tsquery('english', f_unaccent(${searchQuery})),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
'highlight', 'highlight',
), ),
]) ])
.where('tsv', '@@', sql<string>`to_tsquery(${searchQuery})`) .where(
'tsv',
'@@',
sql<string>`to_tsquery('english', f_unaccent(${searchQuery}))`,
)
.$if(Boolean(searchParams.creatorId), (qb) => .$if(Boolean(searchParams.creatorId), (qb) =>
qb.where('creatorId', '=', searchParams.creatorId), qb.where('creatorId', '=', searchParams.creatorId),
) )
@ -138,21 +144,37 @@ export class SearchService {
const query = suggestion.query.toLowerCase().trim(); const query = suggestion.query.toLowerCase().trim();
if (suggestion.includeUsers) { if (suggestion.includeUsers) {
users = await this.db const userQuery = this.db
.selectFrom('users') .selectFrom('users')
.select(['id', 'name', 'email', 'avatarUrl']) .select(['id', 'name', 'email', 'avatarUrl'])
.where((eb) => eb(sql`LOWER(users.name)`, 'like', `%${query}%`))
.where('workspaceId', '=', workspaceId) .where('workspaceId', '=', workspaceId)
.where('deletedAt', 'is', null) .where('deletedAt', 'is', null)
.limit(limit) .where((eb) =>
.execute(); eb.or([
eb(
sql`LOWER(f_unaccent(users.name))`,
'like',
sql`LOWER(f_unaccent(${`%${query}%`}))`,
),
eb(sql`users.email`, 'ilike', sql`f_unaccent(${`%${query}%`})`),
]),
)
.limit(limit);
users = await userQuery.execute();
} }
if (suggestion.includeGroups) { if (suggestion.includeGroups) {
groups = await this.db groups = await this.db
.selectFrom('groups') .selectFrom('groups')
.select(['id', 'name', 'description']) .select(['id', 'name', 'description'])
.where((eb) => eb(sql`LOWER(groups.name)`, 'like', `%${query}%`)) .where((eb) =>
eb(
sql`LOWER(f_unaccent(groups.name))`,
'like',
sql`LOWER(f_unaccent(${`%${query}%`}))`,
),
)
.where('workspaceId', '=', workspaceId) .where('workspaceId', '=', workspaceId)
.limit(limit) .limit(limit)
.execute(); .execute();
@ -162,7 +184,13 @@ export class SearchService {
let pageSearch = this.db let pageSearch = this.db
.selectFrom('pages') .selectFrom('pages')
.select(['id', 'slugId', 'title', 'icon', 'spaceId']) .select(['id', 'slugId', 'title', 'icon', 'spaceId'])
.where((eb) => eb(sql`LOWER(pages.title)`, 'like', `%${query}%`)) .where((eb) =>
eb(
sql`LOWER(f_unaccent(pages.title))`,
'like',
sql`LOWER(f_unaccent(${`%${query}%`}))`,
),
)
.where('workspaceId', '=', workspaceId) .where('workspaceId', '=', workspaceId)
.limit(limit); .limit(limit);

View File

@ -8,6 +8,7 @@ import { AcceptInviteDto, InviteUserDto } from '../dto/invitation.dto';
import { UserRepo } from '@docmost/db/repos/user/user.repo'; import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { InjectKysely } from 'nestjs-kysely'; import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types'; import { KyselyDB } from '@docmost/db/types/kysely.types';
import { sql } from 'kysely';
import { executeTx } from '@docmost/db/utils'; import { executeTx } from '@docmost/db/utils';
import { import {
Group, Group,
@ -55,7 +56,11 @@ export class WorkspaceInvitationService {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('email', 'ilike', `%${pagination.query}%`), eb(
sql`email`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
); );
} }

View File

@ -0,0 +1,50 @@
import { type Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
// Create unaccent extension
await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db);
// Create pg_trgm extension
await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db);
// Create IMMUTABLE wrapper function for unaccent
// This allows us to create indexes on unaccented columns for better performance
// https://stackoverflow.com/a/11007216/8299075
await sql`
CREATE OR REPLACE FUNCTION f_unaccent(text) RETURNS text
AS $$
SELECT unaccent('unaccent', $1);
$$ LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT;
`.execute(db);
// Update the pages tsvector trigger to use the immutable function
await sql`
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
begin
new.tsv :=
setweight(to_tsvector('english', f_unaccent(coalesce(new.title, ''))), 'A') ||
setweight(to_tsvector('english', f_unaccent(substring(coalesce(new.text_content, ''), 1, 1000000))), 'B');
return new;
end;
$$ LANGUAGE plpgsql;
`.execute(db);
}
export async function down(db: Kysely<any>): Promise<void> {
await sql`
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
begin
new.tsv :=
setweight(to_tsvector('english', coalesce(new.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(new.text_content, '')), 'B');
return new;
end;
$$ LANGUAGE plpgsql;
`.execute(db);
await sql`DROP FUNCTION IF EXISTS f_unaccent(text)`.execute(db);
await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db);
await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db);
}

View File

@ -6,6 +6,7 @@ import {
import { InjectKysely } from 'nestjs-kysely'; import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types'; import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { dbOrTx, executeTx } from '@docmost/db/utils'; import { dbOrTx, executeTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types'; import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types';
import { PaginationOptions } from '../../pagination/pagination-options'; import { PaginationOptions } from '../../pagination/pagination-options';
import { executeWithPagination } from '@docmost/db/pagination/pagination'; import { executeWithPagination } from '@docmost/db/pagination/pagination';
@ -56,7 +57,7 @@ export class GroupUserRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`), eb(sql`f_unaccent(users.name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`),
); );
} }

View File

@ -114,10 +114,10 @@ export class GroupRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or( eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
'description', sql`f_unaccent(description)`,
'ilike', 'ilike',
`%${pagination.query}%`, sql`f_unaccent(${'%' + pagination.query + '%'})`,
), ),
); );
} }

View File

@ -2,6 +2,7 @@ import { BadRequestException, Injectable } from '@nestjs/common';
import { InjectKysely } from 'nestjs-kysely'; import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types'; import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { dbOrTx } from '@docmost/db/utils'; import { dbOrTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import { import {
InsertableSpaceMember, InsertableSpaceMember,
SpaceMember, SpaceMember,
@ -119,9 +120,21 @@ export class SpaceMemberRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`) eb(
.or('users.email', 'ilike', `%${pagination.query}%`) sql`f_unaccent(users.name)`,
.or('groups.name', 'ilike', `%${pagination.query}%`), 'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
)
.or(
sql`users.email`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
)
.or(
sql`f_unaccent(groups.name)`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
),
); );
} }
@ -228,10 +241,14 @@ export class SpaceMemberRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or( eb(
'description', sql`f_unaccent(name)`,
'ilike', 'ilike',
`%${pagination.query}%`, sql`f_unaccent(${'%' + pagination.query + '%'})`,
).or(
sql`f_unaccent(description)`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
), ),
); );
} }

View File

@ -110,10 +110,10 @@ export class SpaceRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('name', 'ilike', `%${pagination.query}%`).or( eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
'description', sql`f_unaccent(description)`,
'ilike', 'ilike',
`%${pagination.query}%`, sql`f_unaccent(${'%' + pagination.query + '%'})`,
), ),
); );
} }

View File

@ -4,6 +4,7 @@ import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
import { DB, Users } from '@docmost/db/types/db'; import { DB, Users } from '@docmost/db/types/db';
import { hashPassword } from '../../../common/helpers'; import { hashPassword } from '../../../common/helpers';
import { dbOrTx } from '@docmost/db/utils'; import { dbOrTx } from '@docmost/db/utils';
import { sql } from 'kysely';
import { import {
InsertableUser, InsertableUser,
UpdatableUser, UpdatableUser,
@ -149,10 +150,14 @@ export class UserRepo {
if (pagination.query) { if (pagination.query) {
query = query.where((eb) => query = query.where((eb) =>
eb('users.name', 'ilike', `%${pagination.query}%`).or( eb(
'users.email', sql`f_unaccent(users.name)`,
'ilike', 'ilike',
`%${pagination.query}%`, sql`f_unaccent(${'%' + pagination.query + '%'})`,
).or(
sql`users.email`,
'ilike',
sql`f_unaccent(${'%' + pagination.query + '%'})`,
), ),
); );
} }