mirror of
https://github.com/docmost/docmost.git
synced 2025-11-13 04:22:37 +10:00
feat: add unaccent support for accent-insensitive search (#1402)
- Add PostgreSQL unaccent and pg_trgm extensions - Create immutable f_unaccent wrapper function for performance - Update all search queries to use f_unaccent for accent-insensitive matching - Add 1MB limit to tsvector content to prevent errors on large documents - Update full-text search trigger to use f_unaccent - Fix MultiSelect client-side filtering to show server results properly
This commit is contained in:
@ -61,47 +61,26 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
|
|||||||
type: "group",
|
type: "group",
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Function to merge items into groups without duplicates
|
// Create fresh data structure based on current search results
|
||||||
const mergeItemsIntoGroups = (existingGroups, newItems, groupName) => {
|
const newData = [];
|
||||||
const existingValues = new Set(
|
|
||||||
existingGroups.flatMap((group) =>
|
|
||||||
group.items.map((item) => item.value),
|
|
||||||
),
|
|
||||||
);
|
|
||||||
const newItemsFiltered = newItems.filter(
|
|
||||||
(item) => !existingValues.has(item.value),
|
|
||||||
);
|
|
||||||
|
|
||||||
const updatedGroups = existingGroups.map((group) => {
|
if (userItems && userItems.length > 0) {
|
||||||
if (group.group === groupName) {
|
newData.push({
|
||||||
return { ...group, items: [...group.items, ...newItemsFiltered] };
|
group: t("Select a user"),
|
||||||
}
|
items: userItems,
|
||||||
return group;
|
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Use spread syntax to avoid mutation
|
if (groupItems && groupItems.length > 0) {
|
||||||
return updatedGroups.some((group) => group.group === groupName)
|
newData.push({
|
||||||
? updatedGroups
|
group: t("Select a group"),
|
||||||
: [...updatedGroups, { group: groupName, items: newItemsFiltered }];
|
items: groupItems,
|
||||||
};
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Merge user items into groups
|
setData(newData);
|
||||||
const updatedUserGroups = mergeItemsIntoGroups(
|
|
||||||
data,
|
|
||||||
userItems,
|
|
||||||
t("Select a user"),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Merge group items into groups
|
|
||||||
const finalData = mergeItemsIntoGroups(
|
|
||||||
updatedUserGroups,
|
|
||||||
groupItems,
|
|
||||||
t("Select a group"),
|
|
||||||
);
|
|
||||||
|
|
||||||
setData(finalData);
|
|
||||||
}
|
}
|
||||||
}, [suggestion, data]);
|
}, [suggestion, t]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<MultiSelect
|
<MultiSelect
|
||||||
@ -114,6 +93,7 @@ export function MultiMemberSelect({ onChange }: MultiMemberSelectProps) {
|
|||||||
searchable
|
searchable
|
||||||
searchValue={searchValue}
|
searchValue={searchValue}
|
||||||
onSearchChange={setSearchValue}
|
onSearchChange={setSearchValue}
|
||||||
|
filter={({ options }) => options}
|
||||||
clearable
|
clearable
|
||||||
variant="filled"
|
variant="filled"
|
||||||
onChange={onChange}
|
onChange={onChange}
|
||||||
|
|||||||
@ -76,6 +76,10 @@ export function sanitizeFileName(fileName: string): string {
|
|||||||
return sanitizedFilename.slice(0, 255);
|
return sanitizedFilename.slice(0, 255);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function removeAccent(str: string): string {
|
||||||
|
if (!str) return str;
|
||||||
|
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
||||||
|
|
||||||
export function extractBearerTokenFromHeader(
|
export function extractBearerTokenFromHeader(
|
||||||
request: FastifyRequest,
|
request: FastifyRequest,
|
||||||
): string | undefined {
|
): string | undefined {
|
||||||
|
|||||||
@ -44,12 +44,18 @@ export class SearchService {
|
|||||||
'creatorId',
|
'creatorId',
|
||||||
'createdAt',
|
'createdAt',
|
||||||
'updatedAt',
|
'updatedAt',
|
||||||
sql<number>`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('rank'),
|
sql<number>`ts_rank(tsv, to_tsquery('english', f_unaccent(${searchQuery})))`.as(
|
||||||
sql<string>`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
|
'rank',
|
||||||
|
),
|
||||||
|
sql<string>`ts_headline('english', text_content, to_tsquery('english', f_unaccent(${searchQuery})),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
|
||||||
'highlight',
|
'highlight',
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
.where('tsv', '@@', sql<string>`to_tsquery(${searchQuery})`)
|
.where(
|
||||||
|
'tsv',
|
||||||
|
'@@',
|
||||||
|
sql<string>`to_tsquery('english', f_unaccent(${searchQuery}))`,
|
||||||
|
)
|
||||||
.$if(Boolean(searchParams.creatorId), (qb) =>
|
.$if(Boolean(searchParams.creatorId), (qb) =>
|
||||||
qb.where('creatorId', '=', searchParams.creatorId),
|
qb.where('creatorId', '=', searchParams.creatorId),
|
||||||
)
|
)
|
||||||
@ -138,21 +144,37 @@ export class SearchService {
|
|||||||
const query = suggestion.query.toLowerCase().trim();
|
const query = suggestion.query.toLowerCase().trim();
|
||||||
|
|
||||||
if (suggestion.includeUsers) {
|
if (suggestion.includeUsers) {
|
||||||
users = await this.db
|
const userQuery = this.db
|
||||||
.selectFrom('users')
|
.selectFrom('users')
|
||||||
.select(['id', 'name', 'email', 'avatarUrl'])
|
.select(['id', 'name', 'email', 'avatarUrl'])
|
||||||
.where((eb) => eb(sql`LOWER(users.name)`, 'like', `%${query}%`))
|
|
||||||
.where('workspaceId', '=', workspaceId)
|
.where('workspaceId', '=', workspaceId)
|
||||||
.where('deletedAt', 'is', null)
|
.where('deletedAt', 'is', null)
|
||||||
.limit(limit)
|
.where((eb) =>
|
||||||
.execute();
|
eb.or([
|
||||||
|
eb(
|
||||||
|
sql`LOWER(f_unaccent(users.name))`,
|
||||||
|
'like',
|
||||||
|
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||||
|
),
|
||||||
|
eb(sql`users.email`, 'ilike', sql`f_unaccent(${`%${query}%`})`),
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.limit(limit);
|
||||||
|
|
||||||
|
users = await userQuery.execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (suggestion.includeGroups) {
|
if (suggestion.includeGroups) {
|
||||||
groups = await this.db
|
groups = await this.db
|
||||||
.selectFrom('groups')
|
.selectFrom('groups')
|
||||||
.select(['id', 'name', 'description'])
|
.select(['id', 'name', 'description'])
|
||||||
.where((eb) => eb(sql`LOWER(groups.name)`, 'like', `%${query}%`))
|
.where((eb) =>
|
||||||
|
eb(
|
||||||
|
sql`LOWER(f_unaccent(groups.name))`,
|
||||||
|
'like',
|
||||||
|
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||||
|
),
|
||||||
|
)
|
||||||
.where('workspaceId', '=', workspaceId)
|
.where('workspaceId', '=', workspaceId)
|
||||||
.limit(limit)
|
.limit(limit)
|
||||||
.execute();
|
.execute();
|
||||||
@ -162,7 +184,13 @@ export class SearchService {
|
|||||||
let pageSearch = this.db
|
let pageSearch = this.db
|
||||||
.selectFrom('pages')
|
.selectFrom('pages')
|
||||||
.select(['id', 'slugId', 'title', 'icon', 'spaceId'])
|
.select(['id', 'slugId', 'title', 'icon', 'spaceId'])
|
||||||
.where((eb) => eb(sql`LOWER(pages.title)`, 'like', `%${query}%`))
|
.where((eb) =>
|
||||||
|
eb(
|
||||||
|
sql`LOWER(f_unaccent(pages.title))`,
|
||||||
|
'like',
|
||||||
|
sql`LOWER(f_unaccent(${`%${query}%`}))`,
|
||||||
|
),
|
||||||
|
)
|
||||||
.where('workspaceId', '=', workspaceId)
|
.where('workspaceId', '=', workspaceId)
|
||||||
.limit(limit);
|
.limit(limit);
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import { AcceptInviteDto, InviteUserDto } from '../dto/invitation.dto';
|
|||||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||||
import { InjectKysely } from 'nestjs-kysely';
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||||
|
import { sql } from 'kysely';
|
||||||
import { executeTx } from '@docmost/db/utils';
|
import { executeTx } from '@docmost/db/utils';
|
||||||
import {
|
import {
|
||||||
Group,
|
Group,
|
||||||
@ -55,7 +56,11 @@ export class WorkspaceInvitationService {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('email', 'ilike', `%${pagination.query}%`),
|
eb(
|
||||||
|
sql`email`,
|
||||||
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,50 @@
|
|||||||
|
import { type Kysely, sql } from 'kysely';
|
||||||
|
|
||||||
|
export async function up(db: Kysely<any>): Promise<void> {
|
||||||
|
// Create unaccent extension
|
||||||
|
await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db);
|
||||||
|
|
||||||
|
// Create pg_trgm extension
|
||||||
|
await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db);
|
||||||
|
|
||||||
|
// Create IMMUTABLE wrapper function for unaccent
|
||||||
|
// This allows us to create indexes on unaccented columns for better performance
|
||||||
|
// https://stackoverflow.com/a/11007216/8299075
|
||||||
|
await sql`
|
||||||
|
CREATE OR REPLACE FUNCTION f_unaccent(text) RETURNS text
|
||||||
|
AS $$
|
||||||
|
SELECT unaccent('unaccent', $1);
|
||||||
|
$$ LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT;
|
||||||
|
`.execute(db);
|
||||||
|
|
||||||
|
// Update the pages tsvector trigger to use the immutable function
|
||||||
|
await sql`
|
||||||
|
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
|
||||||
|
begin
|
||||||
|
new.tsv :=
|
||||||
|
setweight(to_tsvector('english', f_unaccent(coalesce(new.title, ''))), 'A') ||
|
||||||
|
setweight(to_tsvector('english', f_unaccent(substring(coalesce(new.text_content, ''), 1, 1000000))), 'B');
|
||||||
|
return new;
|
||||||
|
end;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
`.execute(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function down(db: Kysely<any>): Promise<void> {
|
||||||
|
await sql`
|
||||||
|
CREATE OR REPLACE FUNCTION pages_tsvector_trigger() RETURNS trigger AS $$
|
||||||
|
begin
|
||||||
|
new.tsv :=
|
||||||
|
setweight(to_tsvector('english', coalesce(new.title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('english', coalesce(new.text_content, '')), 'B');
|
||||||
|
return new;
|
||||||
|
end;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
`.execute(db);
|
||||||
|
|
||||||
|
await sql`DROP FUNCTION IF EXISTS f_unaccent(text)`.execute(db);
|
||||||
|
|
||||||
|
await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db);
|
||||||
|
|
||||||
|
await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db);
|
||||||
|
}
|
||||||
@ -6,6 +6,7 @@ import {
|
|||||||
import { InjectKysely } from 'nestjs-kysely';
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||||
import { dbOrTx, executeTx } from '@docmost/db/utils';
|
import { dbOrTx, executeTx } from '@docmost/db/utils';
|
||||||
|
import { sql } from 'kysely';
|
||||||
import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types';
|
import { GroupUser, InsertableGroupUser } from '@docmost/db/types/entity.types';
|
||||||
import { PaginationOptions } from '../../pagination/pagination-options';
|
import { PaginationOptions } from '../../pagination/pagination-options';
|
||||||
import { executeWithPagination } from '@docmost/db/pagination/pagination';
|
import { executeWithPagination } from '@docmost/db/pagination/pagination';
|
||||||
@ -56,7 +57,7 @@ export class GroupUserRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('users.name', 'ilike', `%${pagination.query}%`),
|
eb(sql`f_unaccent(users.name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -114,10 +114,10 @@ export class GroupRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
|
||||||
'description',
|
sql`f_unaccent(description)`,
|
||||||
'ilike',
|
'ilike',
|
||||||
`%${pagination.query}%`,
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import { BadRequestException, Injectable } from '@nestjs/common';
|
|||||||
import { InjectKysely } from 'nestjs-kysely';
|
import { InjectKysely } from 'nestjs-kysely';
|
||||||
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
||||||
import { dbOrTx } from '@docmost/db/utils';
|
import { dbOrTx } from '@docmost/db/utils';
|
||||||
|
import { sql } from 'kysely';
|
||||||
import {
|
import {
|
||||||
InsertableSpaceMember,
|
InsertableSpaceMember,
|
||||||
SpaceMember,
|
SpaceMember,
|
||||||
@ -119,9 +120,21 @@ export class SpaceMemberRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('users.name', 'ilike', `%${pagination.query}%`)
|
eb(
|
||||||
.or('users.email', 'ilike', `%${pagination.query}%`)
|
sql`f_unaccent(users.name)`,
|
||||||
.or('groups.name', 'ilike', `%${pagination.query}%`),
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
)
|
||||||
|
.or(
|
||||||
|
sql`users.email`,
|
||||||
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
)
|
||||||
|
.or(
|
||||||
|
sql`f_unaccent(groups.name)`,
|
||||||
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,10 +241,14 @@ export class SpaceMemberRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
eb(
|
||||||
'description',
|
sql`f_unaccent(name)`,
|
||||||
'ilike',
|
'ilike',
|
||||||
`%${pagination.query}%`,
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
).or(
|
||||||
|
sql`f_unaccent(description)`,
|
||||||
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -110,10 +110,10 @@ export class SpaceRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('name', 'ilike', `%${pagination.query}%`).or(
|
eb(sql`f_unaccent(name)`, 'ilike', sql`f_unaccent(${'%' + pagination.query + '%'})`).or(
|
||||||
'description',
|
sql`f_unaccent(description)`,
|
||||||
'ilike',
|
'ilike',
|
||||||
`%${pagination.query}%`,
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import { KyselyDB, KyselyTransaction } from '@docmost/db/types/kysely.types';
|
|||||||
import { DB, Users } from '@docmost/db/types/db';
|
import { DB, Users } from '@docmost/db/types/db';
|
||||||
import { hashPassword } from '../../../common/helpers';
|
import { hashPassword } from '../../../common/helpers';
|
||||||
import { dbOrTx } from '@docmost/db/utils';
|
import { dbOrTx } from '@docmost/db/utils';
|
||||||
|
import { sql } from 'kysely';
|
||||||
import {
|
import {
|
||||||
InsertableUser,
|
InsertableUser,
|
||||||
UpdatableUser,
|
UpdatableUser,
|
||||||
@ -149,10 +150,14 @@ export class UserRepo {
|
|||||||
|
|
||||||
if (pagination.query) {
|
if (pagination.query) {
|
||||||
query = query.where((eb) =>
|
query = query.where((eb) =>
|
||||||
eb('users.name', 'ilike', `%${pagination.query}%`).or(
|
eb(
|
||||||
'users.email',
|
sql`f_unaccent(users.name)`,
|
||||||
'ilike',
|
'ilike',
|
||||||
`%${pagination.query}%`,
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
|
).or(
|
||||||
|
sql`users.email`,
|
||||||
|
'ilike',
|
||||||
|
sql`f_unaccent(${'%' + pagination.query + '%'})`,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user