mirror of
https://github.com/docmost/docmost.git
synced 2025-11-13 00:52:40 +10:00
feat: add unaccent support for accent-insensitive search (#1402)
- Add PostgreSQL unaccent and pg_trgm extensions - Create immutable f_unaccent wrapper function for performance - Update all search queries to use f_unaccent for accent-insensitive matching - Add 1MB limit to tsvector content to prevent errors on large documents - Update full-text search trigger to use f_unaccent - Fix MultiSelect client-side filtering to show server results properly
This commit is contained in:
@ -61,47 +61,26 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
|
||||
type: "group",
|
||||
}));
|
||||
|
||||
// Function to merge items into groups without duplicates
|
||||
const mergeItemsIntoGroups = (existingGroups, newItems, groupName) => {
|
||||
const existingValues = new Set(
|
||||
existingGroups.flatMap((group) =>
|
||||
group.items.map((item) => item.value),
|
||||
),
|
||||
);
|
||||
const newItemsFiltered = newItems.filter(
|
||||
(item) => !existingValues.has(item.value),
|
||||
);
|
||||
// Create fresh data structure based on current search results
|
||||
const newData = [];
|
||||
|
||||
const updatedGroups = existingGroups.map((group) => {
|
||||
if (group.group === groupName) {
|
||||
return { ...group, items: [...group.items, ...newItemsFiltered] };
|
||||
}
|
||||
return group;
|
||||
if (userItems && userItems.length > 0) {
|
||||
newData.push({
|
||||
group: t("Select a user"),
|
||||
items: userItems,
|
||||
});
|
||||
}
|
||||
|
||||
// Use spread syntax to avoid mutation
|
||||
return updatedGroups.some((group) => group.group === groupName)
|
||||
? updatedGroups
|
||||
: [...updatedGroups, { group: groupName, items: newItemsFiltered }];
|
||||
};
|
||||
if (groupItems && groupItems.length > 0) {
|
||||
newData.push({
|
||||
group: t("Select a group"),
|
||||
items: groupItems,
|
||||
});
|
||||
}
|
||||
|
||||
// Merge user items into groups
|
||||
const updatedUserGroups = mergeItemsIntoGroups(
|
||||
data,
|
||||
userItems,
|
||||
t("Select a user"),
|
||||
);
|
||||
|
||||
// Merge group items into groups
|
||||
const finalData = mergeItemsIntoGroups(
|
||||
updatedUserGroups,
|
||||
groupItems,
|
||||
t("Select a group"),
|
||||
);
|
||||
|
||||
setData(finalData);
|
||||
setData(newData);
|
||||
}
|
||||
}, [suggestion, data]);
|
||||
}, [suggestion, t]);
|
||||
|
||||
return (
|
||||
<MultiSelect
|
||||
@ -114,6 +93,7 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
|
||||
searchable
|
||||
searchValue={searchValue}
|
||||
onSearchChange={setSearchValue}
|
||||
filter={({ options }) => options}
|
||||
clearable
|
||||
variant="filled"
|
||||
onChange={onChange}
|
||||
|
||||
@ -76,6 +76,10 @@ export function sanitizeFileName(fileName: string): string {
|
||||
return sanitizedFilename.slice(0, 255);
|
||||
}
|
||||
|
||||
export function removeAccent(str: string): string {
|
||||
if (!str) return str;
|
||||
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
||||
|
||||
export function extractBearerTokenFromHeader(
|
||||
request: FastifyRequest,
|
||||
): string | undefined {
|
||||
|
||||
@ -44,12 +44,18 @@ export class SearchService {
|
||||
'creatorId',
|
||||
'createdAt',
|
||||
'updatedAt',
|
||||
sql<number>`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('rank'),
|
||||
sql<string>`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
|
||||
sql<number>`ts_rank(tsv, to_tsquery('english', f_unaccent(${searchQuery})))`.as(
|
||||
'rank',
|
||||
),
|
||||
sql<string>`ts_headline('english', text_content, to_tsquery('english', f_unaccent(${searchQuery})),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
|
||||
'highlight',
|
||||
),
|
||||
])
|
||||
.where('tsv', '@@', sql<string>`to_tsquery(${searchQuery})`)
|
||||
.where(
|
||||
'tsv',
|
||||
'@@',
|
||||
sql<string>`to_tsquery('english', f_unaccent(${searchQuery}))`,
|
||||
)
|
||||
.$if(Boolean(searchParams.creatorId), (qb) =>
|
||||
qb.where('creatorId', '=', searchParams.creatorId),
|
||||
)
|
||||
@ -138,21 +144,37 @@ export class SearchService {
|
||||
const query = suggestion.query.toLowerCase().trim();
|
||||
|
||||
if (suggestion.includeUsers) {
|
||||
users = await this.db
|
||||
const userQuery = this.db
|
||||
.selectFrom('users')
|
||||
.select(['id', 'name', 'email', 'avatarUrl'])
|
||||
.where((eb) => eb(sql`LOWER(users.name)`, 'like', `%${query}%`))
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('deletedAt', 'is', null)
|
||||
.limit(limit)
|
||||
.execute();
|
||||
.where((eb) =>
|
||||
eb.or([
|
||||
eb(
|
||||
sql`LOWER(f_unaccent(users.name))`,
|
||||
'like',
|
||||
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||
),
|
||||
eb(sql`users.email`, 'ilike', sql`f_unaccent(${`%${query}%`})`),
|
||||
]),
|
||||
)
|
||||
.limit(limit);
|
||||
|
||||
users = await userQuery.execute();
|
||||
}
|
||||
|
||||
if (suggestion.includeGroups) {
|
||||
groups = await this.db
|
||||
.selectFrom('groups')
|
||||
.select(['id', 'name', 'description'])
|
||||
.where((eb) => eb(sql`LOWER(groups.name)`, 'like', `%${query}%`))
|
||||
.where((eb) =>
|
||||
eb(
|
||||
sql`LOWER(f_unaccent(groups.name))`,
|
||||
'like',
|
||||
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||
),
|
||||
)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.limit(limit)
|
||||
.execute();
|
||||
@ -162,7 +184,13 @@ export class SearchService {
|
||||
let pageSearch = this.db
|
||||
.selectFrom('pages')
|
||||
.select(['id', 'slugId', 'title', 'icon', 'spaceId'])
|
||||
.where((eb) => eb(sql`LOWER(pages.title)`, 'like', `%${query}%`))
|
||||
.where((eb) =>
|
||||
eb(
|
||||
sql`LOWER(f_unaccent(pages.title))`,
|
||||
'like',
|
||||
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||
),
|
||||
)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.limit(limit);
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ import { AcceptInviteDto, InviteUserDto } from '../dto/invitation.dto';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { sql } from 'kysely';
|
||||
import { executeTx } from '@docmost/db/utils';
|
||||
import {
|
||||
Group,
|
||||
@ -55,7 +56,11 @@ export class WorkspaceInvitationService {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('email', 'ilike', `%${pagination.query}%`),
|
||||
eb(
|
||||
sql`email`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,50 @@
|
||||
import { type Kysely, sql } from 'kysely';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// Create unaccent extension
|
||||
await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db);
|
||||
|
||||
// Create pg_trgm extension
|
||||
await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db);
|
||||
|
||||
// Create IMMUTABLE wrapper function for unaccent
|
||||
// This allows us to create indexes on unaccented columns for better performance
|
||||
// https://stackoverflow.com/a/11007216/8299075
|
||||
await sql`
|
||||
CREATE OR REPLACE FUNCTION f_unaccent(text) RETURNS text
|
||||
AS $$
|
||||
SELECT unaccent('unaccent', $1);
|
||||
$$ LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT;
|
||||
`.execute(db);
|
||||
|
||||
// Update the pages tsvector trigger to use the immutable function
|
||||
await sql`
|
||||
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
|
||||
begin
|
||||
new.tsv :=
|
||||
setweight(to_tsvector('english', f_unaccent(coalesce(new.title, ''))), 'A') ||
|
||||
setweight(to_tsvector('english', f_unaccent(substring(coalesce(new.text_content, ''), 1, 1000000))), 'B');
|
||||
return new;
|
||||
end;
|
||||
$$ LANGUAGE plpgsql;
|
||||
`.execute(db);
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await sql`
|
||||
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
|
||||
begin
|
||||
new.tsv :=
|
||||
setweight(to_tsvector('english', coalesce(new.title, '')), 'A') ||
|
||||
setweight(to_tsvector('english', coalesce(new.text_content, '')), 'B');
|
||||
return new;
|
||||
end;
|
||||
$$ LANGUAGE plpgsql;
|
||||
`.execute(db);
|
||||
|
||||
await sql`DROP FUNCTION IF EXISTS f_unaccent(text)`.execute(db);
|
||||
|
||||
await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db);
|
||||
|
||||
await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db);
|
||||
}
|
||||
@ -6,6 +6,7 @@ import {
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||
import { dbOrTx, executeTx } from '@docmost/db/utils';
|
||||
import { sql } from 'kysely';
|
||||
import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types';
|
||||
import { PaginationOptions } from '../../pagination/pagination-options';
|
||||
import { executeWithPagination } from '@docmost/db/pagination/pagination';
|
||||
@ -56,7 +57,7 @@ export class GroupUserRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('users.name', 'ilike', `%${pagination.query}%`),
|
||||
eb(sql`f_unaccent(users.name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -114,10 +114,10 @@ export class GroupRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
||||
'description',
|
||||
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
|
||||
sql`f_unaccent(description)`,
|
||||
'ilike',
|
||||
`%${pagination.query}%`,
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ import { BadRequestException, Injectable } from '@nestjs/common';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||
import { dbOrTx } from '@docmost/db/utils';
|
||||
import { sql } from 'kysely';
|
||||
import {
|
||||
InsertableSpaceMember,
|
||||
SpaceMember,
|
||||
@ -119,9 +120,21 @@ export class SpaceMemberRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('users.name', 'ilike', `%${pagination.query}%`)
|
||||
.or('users.email', 'ilike', `%${pagination.query}%`)
|
||||
.or('groups.name', 'ilike', `%${pagination.query}%`),
|
||||
eb(
|
||||
sql`f_unaccent(users.name)`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
)
|
||||
.or(
|
||||
sql`users.email`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
)
|
||||
.or(
|
||||
sql`f_unaccent(groups.name)`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@ -228,10 +241,14 @@ export class SpaceMemberRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
||||
'description',
|
||||
eb(
|
||||
sql`f_unaccent(name)`,
|
||||
'ilike',
|
||||
`%${pagination.query}%`,
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
).or(
|
||||
sql`f_unaccent(description)`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@ -110,10 +110,10 @@ export class SpaceRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
||||
'description',
|
||||
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
|
||||
sql`f_unaccent(description)`,
|
||||
'ilike',
|
||||
`%${pagination.query}%`,
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||
import { DB, Users } from '@docmost/db/types/db';
|
||||
import { hashPassword } from '../../../common/helpers';
|
||||
import { dbOrTx } from '@docmost/db/utils';
|
||||
import { sql } from 'kysely';
|
||||
import {
|
||||
InsertableUser,
|
||||
UpdatableUser,
|
||||
@ -149,10 +150,14 @@ export class UserRepo {
|
||||
|
||||
if (pagination.query) {
|
||||
query = query.where((eb) =>
|
||||
eb('users.name', 'ilike', `%${pagination.query}%`).or(
|
||||
'users.email',
|
||||
eb(
|
||||
sql`f_unaccent(users.name)`,
|
||||
'ilike',
|
||||
`%${pagination.query}%`,
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
).or(
|
||||
sql`users.email`,
|
||||
'ilike',
|
||||
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user