Compare commits

...

6 Commits

Author SHA1 Message Date
0cf44914ad POC 2025-06-30 01:21:01 -07:00
232cea8cc9 sync 2025-06-27 03:20:01 -07:00
b9643d3584 sync 2025-06-27 03:07:51 -07:00
9f144d35fb posthog integration (cloud) (#1304) 2025-06-27 10:58:36 +01:00
e44c170873 fix editor flickers on collab reconnection (#1295)
* fix editor flickers on reconnection

* cleanup

* adjust copy
2025-06-27 10:58:18 +01:00
1be39d4353 sync 2025-06-27 02:22:11 -07:00
30 changed files with 2881 additions and 106 deletions

View File

@ -41,6 +41,7 @@
"lowlight": "^3.3.0",
"mermaid": "^11.6.0",
"mitt": "^3.0.1",
"posthog-js": "^1.255.1",
"react": "^18.3.1",
"react-arborist": "3.4.0",
"react-clear-modal": "^2.0.15",

View File

@ -1,6 +1,8 @@
import { UserProvider } from "@/features/user/user-provider.tsx";
import { Outlet } from "react-router-dom";
import GlobalAppShell from "@/components/layouts/global/global-app-shell.tsx";
import { PosthogUser } from "@/ee/components/posthog-user.tsx";
import { isCloud } from "@/lib/config.ts";
export default function Layout() {
return (
@ -8,6 +10,7 @@ export default function Layout() {
<GlobalAppShell>
<Outlet />
</GlobalAppShell>
{isCloud() && <PosthogUser />}
</UserProvider>
);
}

View File

@ -30,12 +30,12 @@ export default function BillingDetails() {
>
Plan
</Text>
<Text fw={700} fz="lg">
{
plans.find(
(plan) => plan.productId === billing.stripeProductId,
)?.name
}
<Text fw={700} fz="lg" tt="capitalize">
{plans.find(
(plan) => plan.productId === billing.stripeProductId,
)?.name ||
billing.planName ||
"Standard"}
</Text>
</div>
</Group>
@ -154,7 +154,7 @@ export default function BillingDetails() {
Current Tier
</Text>
<Text fw={700} fz="lg">
For up to {billing.tieredUpTo} users
For {billing.tieredUpTo} users
</Text>
{/*billing.tieredFlatAmount && (
<Text c="dimmed" fz="sm">

View File

@ -155,7 +155,7 @@ export default function BillingPlans() {
</Text>
)}
<Text size="md" fw={500}>
for up to {planSelectedTier.upTo} users
For {planSelectedTier.upTo} users
</Text>
</Stack>

View File

@ -0,0 +1,41 @@
import { usePostHog } from "posthog-js/react";
import { useEffect } from "react";
import { useAtom } from "jotai";
import { currentUserAtom } from "@/features/user/atoms/current-user-atom.ts";
export function PosthogUser() {
const posthog = usePostHog();
const [currentUser] = useAtom(currentUserAtom);
useEffect(() => {
if (currentUser) {
const user = currentUser?.user;
const workspace = currentUser?.workspace;
if (!user || !workspace) return;
posthog?.identify(user.id, {
name: user.name,
email: user.email,
workspaceId: user.workspaceId,
workspaceHostname: workspace.hostname,
lastActiveAt: new Date().toISOString(),
createdAt: user.createdAt,
source: "docmost-app",
});
posthog?.group("workspace", workspace.id, {
name: workspace.name,
hostname: workspace.hostname,
plan: workspace?.plan,
status: workspace.status,
isOnTrial: !!workspace.trialEndAt,
hasStripeCustomerId: !!workspace.stripeCustomerId,
memberCount: workspace.memberCount,
lastActiveAt: new Date().toISOString(),
createdAt: workspace.createdAt,
source: "docmost-app",
});
}
}, [posthog, currentUser]);
return null;
}

View File

@ -1,7 +1,6 @@
import "@/features/editor/styles/index.css";
import React, {
useEffect,
useLayoutEffect,
useMemo,
useRef,
useState,
@ -72,7 +71,11 @@ export default function PageEditor({
const [, setAsideState] = useAtom(asideStateAtom);
const [, setActiveCommentId] = useAtom(activeCommentIdAtom);
const [showCommentPopup, setShowCommentPopup] = useAtom(showCommentPopupAtom);
const ydoc = useMemo(() => new Y.Doc(), [pageId]);
const ydocRef = useRef<Y.Doc | null>(null);
if (!ydocRef.current) {
ydocRef.current = new Y.Doc();
}
const ydoc = ydocRef.current;
const [isLocalSynced, setLocalSynced] = useState(false);
const [isRemoteSynced, setRemoteSynced] = useState(false);
const [yjsConnectionStatus, setYjsConnectionStatus] = useAtom(
@ -89,66 +92,100 @@ export default function PageEditor({
const userPageEditMode =
currentUser?.user?.settings?.preferences?.pageEditMode ?? PageEditMode.Edit;
const localProvider = useMemo(() => {
const provider = new IndexeddbPersistence(documentName, ydoc);
// Providers only created once per pageId
const providersRef = useRef<{
local: IndexeddbPersistence;
remote: HocuspocusProvider;
} | null>(null);
const [providersReady, setProvidersReady] = useState(false);
provider.on("synced", () => {
setLocalSynced(true);
});
const localProvider = providersRef.current?.local;
const remoteProvider = providersRef.current?.remote;
return provider;
}, [pageId, ydoc]);
// Track when collaborative provider is ready and synced
const [collabReady, setCollabReady] = useState(false);
useEffect(() => {
if (
remoteProvider?.status === WebSocketStatus.Connected &&
isLocalSynced &&
isRemoteSynced
) {
setCollabReady(true);
}
}, [remoteProvider?.status, isLocalSynced, isRemoteSynced]);
const remoteProvider = useMemo(() => {
const provider = new HocuspocusProvider({
name: documentName,
url: collaborationURL,
document: ydoc,
token: collabQuery?.token,
connect: false,
preserveConnection: false,
onAuthenticationFailed: (auth: onAuthenticationFailedParameters) => {
const payload = jwtDecode(collabQuery?.token);
const now = Date.now().valueOf() / 1000;
const isTokenExpired = now >= payload.exp;
if (isTokenExpired) {
refetchCollabToken();
}
},
onStatus: (status) => {
if (status.status === "connected") {
setYjsConnectionStatus(status.status);
}
},
});
provider.on("synced", () => {
setRemoteSynced(true);
});
provider.on("disconnect", () => {
setYjsConnectionStatus(WebSocketStatus.Disconnected);
});
return provider;
}, [ydoc, pageId, collabQuery?.token]);
useLayoutEffect(() => {
remoteProvider.connect();
useEffect(() => {
if (!providersRef.current) {
const local = new IndexeddbPersistence(documentName, ydoc);
local.on("synced", () => setLocalSynced(true));
const remote = new HocuspocusProvider({
name: documentName,
url: collaborationURL,
document: ydoc,
token: collabQuery?.token,
connect: true,
preserveConnection: false,
onAuthenticationFailed: (auth: onAuthenticationFailedParameters) => {
const payload = jwtDecode(collabQuery?.token);
const now = Date.now().valueOf() / 1000;
const isTokenExpired = now >= payload.exp;
if (isTokenExpired) {
refetchCollabToken();
}
},
onStatus: (status) => {
if (status.status === "connected") {
setYjsConnectionStatus(status.status);
}
},
});
remote.on("synced", () => setRemoteSynced(true));
remote.on("disconnect", () => {
setYjsConnectionStatus(WebSocketStatus.Disconnected);
});
providersRef.current = { local, remote };
setProvidersReady(true);
} else {
setProvidersReady(true);
}
// Only destroy on final unmount
return () => {
setRemoteSynced(false);
setLocalSynced(false);
remoteProvider.destroy();
localProvider.destroy();
providersRef.current?.remote.destroy();
providersRef.current?.local.destroy();
providersRef.current = null;
};
}, [remoteProvider, localProvider]);
}, [pageId]);
// Only connect/disconnect on tab/idle, not destroy
useEffect(() => {
if (!providersReady || !providersRef.current) return;
const remoteProvider = providersRef.current.remote;
if (
isIdle &&
documentState === "hidden" &&
remoteProvider.status === WebSocketStatus.Connected
) {
remoteProvider.disconnect();
setIsCollabReady(false);
return;
}
if (
documentState === "visible" &&
remoteProvider.status === WebSocketStatus.Disconnected
) {
resetIdle();
remoteProvider.connect();
setTimeout(() => setIsCollabReady(true), 500);
}
}, [isIdle, documentState, providersReady, resetIdle]);
const extensions = useMemo(() => {
if (!remoteProvider || !currentUser?.user) return mainExtensions;
return [
...mainExtensions,
...collabExtensions(remoteProvider, currentUser?.user),
];
}, [ydoc, pageId, remoteProvider, currentUser?.user]);
}, [remoteProvider, currentUser?.user]);
const editor = useEditor(
{
@ -202,7 +239,7 @@ export default function PageEditor({
debouncedUpdateContent(editorJson);
},
},
[pageId, editable, remoteProvider?.status],
[pageId, editable, remoteProvider],
);
const debouncedUpdateContent = useDebouncedCallback((newContent: any) => {
@ -255,29 +292,6 @@ export default function PageEditor({
}
}, [remoteProvider?.status]);
useEffect(() => {
if (
isIdle &&
documentState === "hidden" &&
remoteProvider?.status === WebSocketStatus.Connected
) {
remoteProvider.disconnect();
setIsCollabReady(false);
return;
}
if (
documentState === "visible" &&
remoteProvider?.status === WebSocketStatus.Disconnected
) {
resetIdle();
remoteProvider.connect();
setTimeout(() => {
setIsCollabReady(true);
}, 600);
}
}, [isIdle, documentState, remoteProvider]);
const isSynced = isLocalSynced && isRemoteSynced;
useEffect(() => {
@ -294,21 +308,48 @@ export default function PageEditor({
}, [isRemoteSynced, isLocalSynced, remoteProvider?.status]);
useEffect(() => {
// honor user default page edit mode preference
if (userPageEditMode && editor && editable && isSynced) {
if (userPageEditMode === PageEditMode.Edit) {
editor.setEditable(true);
} else if (userPageEditMode === PageEditMode.Read) {
// Only honor user default page edit mode preference and permissions
if (editor) {
if (userPageEditMode && editable) {
if (userPageEditMode === PageEditMode.Edit) {
editor.setEditable(true);
} else if (userPageEditMode === PageEditMode.Read) {
editor.setEditable(false);
}
} else {
editor.setEditable(false);
}
}
}, [userPageEditMode, editor, editable, isSynced]);
}, [userPageEditMode, editor, editable]);
return isCollabReady ? (
<div>
const hasConnectedOnceRef = useRef(false);
const [showStatic, setShowStatic] = useState(true);
useEffect(() => {
if (
!hasConnectedOnceRef.current &&
remoteProvider?.status === WebSocketStatus.Connected
) {
hasConnectedOnceRef.current = true;
setShowStatic(false);
}
}, [remoteProvider?.status]);
if (showStatic) {
return (
<EditorProvider
editable={false}
immediatelyRender={true}
extensions={mainExtensions}
content={content}
/>
);
}
return (
<div style={{ position: "relative" }}>
<div ref={menuContainerRef}>
<EditorContent editor={editor} />
{editor && editor.isEditable && (
<div>
<EditorBubbleMenu editor={editor} />
@ -322,21 +363,12 @@ export default function PageEditor({
<LinkMenu editor={editor} appendTo={menuContainerRef} />
</div>
)}
{showCommentPopup && <CommentDialog editor={editor} pageId={pageId} />}
</div>
<div
onClick={() => editor.commands.focus("end")}
style={{ paddingBottom: "20vh" }}
></div>
</div>
) : (
<EditorProvider
editable={false}
immediatelyRender={true}
extensions={mainExtensions}
content={content}
></EditorProvider>
);
}

View File

@ -12,6 +12,7 @@ export interface IWorkspace {
settings: any;
status: string;
enforceSso: boolean;
stripeCustomerId: string;
billingEmail: string;
trialEndAt: Date;
createdAt: Date;

View File

@ -83,6 +83,18 @@ export function getBillingTrialDays() {
return getConfigValue("BILLING_TRIAL_DAYS");
}
export function getPostHogHost() {
return getConfigValue("POSTHOG_HOST");
}
export function isPostHogEnabled(): boolean {
return Boolean(getPostHogHost() && getPostHogKey());
}
export function getPostHogKey() {
return getConfigValue("POSTHOG_KEY");
}
function getConfigValue(key: string, defaultValue: string = undefined): string {
const rawValue = import.meta.env.DEV
? process?.env?.[key]

View File

@ -3,7 +3,7 @@ import "@mantine/spotlight/styles.css";
import "@mantine/notifications/styles.css";
import ReactDOM from "react-dom/client";
import App from "./App.tsx";
import { mantineCssResolver, theme } from '@/theme';
import { mantineCssResolver, theme } from "@/theme";
import { MantineProvider } from "@mantine/core";
import { BrowserRouter } from "react-router-dom";
import { ModalsProvider } from "@mantine/modals";
@ -11,6 +11,14 @@ import { Notifications } from "@mantine/notifications";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { HelmetProvider } from "react-helmet-async";
import "./i18n";
import { PostHogProvider } from "posthog-js/react";
import {
getPostHogHost,
getPostHogKey,
isCloud,
isPostHogEnabled,
} from "@/lib/config.ts";
import posthog from "posthog-js";
export const queryClient = new QueryClient({
defaultOptions: {
@ -23,9 +31,16 @@ export const queryClient = new QueryClient({
},
});
if (isCloud() && isPostHogEnabled) {
posthog.init(getPostHogKey(), {
api_host: getPostHogHost(),
defaults: "2025-05-24",
disable_session_recording: true,
});
}
const root = ReactDOM.createRoot(
document.getElementById("root") as HTMLElement
document.getElementById("root") as HTMLElement,
);
root.render(
@ -35,10 +50,12 @@ root.render(
<QueryClientProvider client={queryClient}>
<Notifications position="bottom-center" limit={3} />
<HelmetProvider>
<App />
<PostHogProvider client={posthog}>
<App />
</PostHogProvider>
</HelmetProvider>
</QueryClientProvider>
</ModalsProvider>
</MantineProvider>
</BrowserRouter>
</BrowserRouter>,
);

View File

@ -14,6 +14,8 @@ export default defineConfig(({ mode }) => {
SUBDOMAIN_HOST,
COLLAB_URL,
BILLING_TRIAL_DAYS,
POSTHOG_HOST,
POSTHOG_KEY,
} = loadEnv(mode, envPath, "");
return {
@ -27,6 +29,8 @@ export default defineConfig(({ mode }) => {
SUBDOMAIN_HOST,
COLLAB_URL,
BILLING_TRIAL_DAYS,
POSTHOG_HOST,
POSTHOG_KEY,
},
APP_VERSION: JSON.stringify(process.env.npm_package_version),
},

View File

@ -70,6 +70,7 @@
"nanoid": "3.3.11",
"nestjs-kysely": "^1.2.0",
"nodemailer": "^7.0.3",
"openai": "^5.8.2",
"openid-client": "^5.7.1",
"passport-google-oauth20": "^2.0.0",
"passport-jwt": "^4.0.1",
@ -77,6 +78,7 @@
"pg-tsquery": "^8.4.2",
"postmark": "^4.0.5",
"react": "^18.3.1",
"redis": "^5.5.6",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.2",
"sanitize-filename-ts": "^1.0.2",

View File

@ -156,6 +156,7 @@ export class PersistenceExtension implements Extension {
page: {
...page,
content: tiptapJson,
textContent: textContent,
lastUpdatedById: context.user.id,
},
});

View File

@ -0,0 +1,444 @@
# AI Search Integration Guide
This guide shows how to integrate the AI Search module with your existing page operations for automatic indexing.
## Event-Based Auto-Indexing
The AI Search module uses event listeners to automatically index pages when they are created, updated, or deleted.
### Emitting Events in Page Service
Update your existing `PageService` to emit events for AI search indexing:
```typescript
// In your page.service.ts
import { EventEmitter2 } from '@nestjs/event-emitter';
import { Injectable } from '@nestjs/common';
@Injectable()
export class PageService {
constructor(
private readonly eventEmitter: EventEmitter2,
// ... other dependencies
) {}
async createPage(createPageDto: CreatePageDto): Promise<Page> {
// Your existing page creation logic
const page = await this.pageRepo.create(createPageDto);
// Emit event for AI search indexing
this.eventEmitter.emit('page.created', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'create'
});
return page;
}
async updatePage(pageId: string, updatePageDto: UpdatePageDto): Promise<Page> {
// Your existing page update logic
const page = await this.pageRepo.update(pageId, updatePageDto);
// Emit event for AI search reindexing
this.eventEmitter.emit('page.updated', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'update'
});
return page;
}
async deletePage(pageId: string): Promise<void> {
// Get page info before deletion
const page = await this.pageRepo.findById(pageId);
// Your existing page deletion logic
await this.pageRepo.delete(pageId);
// Emit event for AI search cleanup
if (page) {
this.eventEmitter.emit('page.deleted', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
operation: 'delete'
});
}
}
}
```
### Adding EventEmitter to Page Module
Make sure your `PageModule` imports the `EventEmitterModule`:
```typescript
// In your page.module.ts
import { Module } from '@nestjs/common';
import { EventEmitterModule } from '@nestjs/event-emitter';
import { PageService } from './services/page.service';
import { PageController } from './page.controller';
@Module({
imports: [
EventEmitterModule, // Add this if not already present
],
controllers: [PageController],
providers: [PageService],
exports: [PageService],
})
export class PageModule {}
```
### Bulk Operations
For bulk operations, you can emit multiple events or use a bulk reindex:
```typescript
async bulkUpdatePages(updates: BulkUpdateDto[]): Promise<Page[]> {
const updatedPages = await this.pageRepo.bulkUpdate(updates);
// Option 1: Emit individual events
for (const page of updatedPages) {
this.eventEmitter.emit('page.updated', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'update'
});
}
// Option 2: Use bulk reindex (more efficient for large batches)
// const pageIds = updatedPages.map(p => p.id);
// this.eventEmitter.emit('ai-search.bulk-reindex', {
// pageIds,
// workspaceId: updatedPages[0]?.workspaceId
// });
return updatedPages;
}
```
## Manual Integration
If you prefer manual control over indexing, you can directly use the AI search services:
```typescript
// In your page.service.ts
import { AiSearchService } from '../ai-search/services/ai-search.service';
@Injectable()
export class PageService {
constructor(
private readonly aiSearchService: AiSearchService,
// ... other dependencies
) {}
async createPageWithSearch(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Manually trigger indexing
try {
await this.aiSearchService.reindexPages({
pageIds: [page.id],
workspaceId: page.workspaceId
});
} catch (error) {
// Log error but don't fail the page creation
console.error('Failed to index page for AI search:', error);
}
return page;
}
}
```
## Frontend Integration
### Adding AI Search to Client
Create AI search service on the client side:
```typescript
// apps/client/src/features/ai-search/services/ai-search-service.ts
import api from "@/lib/api-client";
export interface AiSearchParams {
query: string;
spaceId?: string;
limit?: number;
similarity_threshold?: number;
}
export interface AiSearchResult {
id: string;
title: string;
icon: string;
similarity_score: number;
highlight: string;
space?: {
id: string;
name: string;
slug: string;
};
}
export async function semanticSearch(params: AiSearchParams): Promise<AiSearchResult[]> {
const response = await api.post<AiSearchResult[]>("/ai-search/semantic", params);
return response.data;
}
export async function hybridSearch(params: AiSearchParams): Promise<AiSearchResult[]> {
const response = await api.post<AiSearchResult[]>("/ai-search/hybrid", params);
return response.data;
}
```
### React Query Integration
```typescript
// apps/client/src/features/ai-search/queries/ai-search-query.ts
import { useQuery } from "@tanstack/react-query";
import { semanticSearch, hybridSearch, AiSearchParams } from "../services/ai-search-service";
export function useAiSemanticSearchQuery(params: AiSearchParams) {
return useQuery({
queryKey: ["ai-search", "semantic", params],
queryFn: () => semanticSearch(params),
enabled: !!params.query && params.query.length > 0,
});
}
export function useAiHybridSearchQuery(params: AiSearchParams) {
return useQuery({
queryKey: ["ai-search", "hybrid", params],
queryFn: () => hybridSearch(params),
enabled: !!params.query && params.query.length > 0,
});
}
```
### AI Search Component
```typescript
// apps/client/src/features/ai-search/components/ai-search-spotlight.tsx
import React, { useState } from "react";
import { Spotlight } from "@mantine/spotlight";
import { IconSearch, IconBrain } from "@tabler/icons-react";
import { useDebouncedValue } from "@mantine/hooks";
import { useAiSemanticSearchQuery } from "../queries/ai-search-query";
export function AiSearchSpotlight() {
const [query, setQuery] = useState("");
const [debouncedQuery] = useDebouncedValue(query, 300);
const { data: results, isLoading } = useAiSemanticSearchQuery({
query: debouncedQuery,
limit: 10,
similarity_threshold: 0.7,
});
return (
<Spotlight.Root query={query} onQueryChange={setQuery}>
<Spotlight.Search
placeholder="AI-powered semantic search..."
leftSection={<IconBrain size={20} />}
/>
<Spotlight.ActionsList>
{isLoading && <Spotlight.Empty>Searching...</Spotlight.Empty>}
{!isLoading && (!results || results.length === 0) && (
<Spotlight.Empty>No results found</Spotlight.Empty>
)}
{results?.map((result) => (
<Spotlight.Action key={result.id}>
<div>
<div>{result.title}</div>
<div style={{ fontSize: '0.8em', opacity: 0.7 }}>
Similarity: {(result.similarity_score * 100).toFixed(1)}%
</div>
{result.highlight && (
<div
style={{ fontSize: '0.8em', opacity: 0.6 }}
dangerouslySetInnerHTML={{ __html: result.highlight }}
/>
)}
</div>
</Spotlight.Action>
))}
</Spotlight.ActionsList>
</Spotlight.Root>
);
}
```
## Search Mode Toggle
Create a component that allows users to choose between traditional and AI search:
```typescript
// apps/client/src/features/search/components/search-mode-toggle.tsx
import { SegmentedControl } from "@mantine/core";
import { IconSearch, IconBrain } from "@tabler/icons-react";
interface SearchModeToggleProps {
value: 'traditional' | 'ai' | 'hybrid';
onChange: (value: 'traditional' | 'ai' | 'hybrid') => void;
}
export function SearchModeToggle({ value, onChange }: SearchModeToggleProps) {
return (
<SegmentedControl
value={value}
onChange={onChange}
data={[
{
label: 'Traditional',
value: 'traditional',
icon: IconSearch,
},
{
label: 'AI Semantic',
value: 'ai',
icon: IconBrain,
},
{
label: 'Hybrid',
value: 'hybrid',
icon: IconBrain,
},
]}
/>
);
}
```
## Performance Considerations
### Async Indexing
For better performance, consider making indexing asynchronous:
```typescript
// Use a queue for heavy indexing operations
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
@Injectable()
export class PageService {
constructor(
@InjectQueue('ai-search') private aiSearchQueue: Queue,
) {}
async createPage(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Queue indexing job instead of doing it synchronously
await this.aiSearchQueue.add('index-page', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
});
return page;
}
}
```
### Conditional Indexing
Only index pages when AI search is configured:
```typescript
async createPage(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Check if AI search is enabled before emitting events
if (this.embeddingService.isConfigured()) {
this.eventEmitter.emit('page.created', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'create'
});
}
return page;
}
```
## Testing Integration
### Unit Tests
```typescript
// page.service.spec.ts
import { EventEmitter2 } from '@nestjs/event-emitter';
describe('PageService', () => {
let service: PageService;
let eventEmitter: EventEmitter2;
beforeEach(async () => {
const module = await Test.createTestingModule({
providers: [
PageService,
{
provide: EventEmitter2,
useValue: {
emit: jest.fn(),
},
},
],
}).compile();
service = module.get<PageService>(PageService);
eventEmitter = module.get<EventEmitter2>(EventEmitter2);
});
it('should emit page.created event when creating page', async () => {
const createPageDto = { title: 'Test Page', content: 'Test content' };
await service.createPage(createPageDto);
expect(eventEmitter.emit).toHaveBeenCalledWith('page.created',
expect.objectContaining({
operation: 'create',
title: 'Test Page',
})
);
});
});
```
## Monitoring and Analytics
### Track Search Usage
```typescript
// Add search analytics
this.eventEmitter.emit('ai-search.query', {
query: searchParams.query,
userId: opts.userId,
workspaceId: opts.workspaceId,
searchType: 'semantic',
resultCount: results.length,
executionTime: Date.now() - startTime,
});
```
This integration approach ensures that your AI search stays in sync with your content while maintaining good performance and error handling.

View File

@ -0,0 +1,201 @@
# AI Search Module
A comprehensive AI-powered semantic search module for Docmost that integrates with Redis vector database using the official **node-redis** client to provide intelligent search capabilities following Redis vector search specifications.
## Features
- **Semantic Search**: Find content based on meaning rather than exact keywords using vector embeddings
- **Hybrid Search**: Combines both semantic and traditional full-text search with configurable weights
- **Redis Vector Database**: Uses Redis with RediSearch module for efficient vector operations via node-redis client
- **HNSW Indexing**: Hierarchical Navigable Small World algorithm for fast approximate nearest neighbor search
- **Auto-indexing**: Automatically indexes pages when they are created or updated
- **OpenAI-Compatible**: Supports OpenAI and OpenAI-compatible embedding providers
- **Batch Operations**: Efficient batch processing for large-scale indexing
- **Permission-aware**: Respects user permissions and workspace access
- **COSINE Distance**: Uses cosine distance metric for semantic similarity
## Architecture
```
ai-search/
├── ai-search.controller.ts # REST API endpoints
├── ai-search.module.ts # Module configuration
├── dto/
│ └── semantic-search.dto.ts # Request/response DTOs
├── services/
│ ├── ai-search.service.ts # Main search logic
│ ├── embedding.service.ts # Text embedding generation
│ ├── redis-vector.service.ts # Redis vector operations (node-redis)
│ └── vector.service.ts # Vector math utilities
├── listeners/
│ └── page-update.listener.ts # Auto-indexing on page changes
├── constants.ts # Configuration constants
├── README.md # This file
├── SETUP.md # Setup guide
└── INTEGRATION.md # Integration examples
```
## Configuration
Add these environment variables to your `.env` file:
```env
# Redis Vector Database (using node-redis client)
REDIS_VECTOR_HOST=localhost
REDIS_VECTOR_PORT=6379
REDIS_VECTOR_PASSWORD=your_redis_password
REDIS_VECTOR_DB=0
REDIS_VECTOR_INDEX=docmost_pages
# AI Embedding Configuration (OpenAI-compatible)
AI_EMBEDDING_MODEL=text-embedding-3-small
AI_EMBEDDING_DIMENSIONS=1536
AI_EMBEDDING_BASE_URL=https://api.openai.com/v1/embeddings # Optional: for custom providers
# OpenAI API Key (or compatible provider key)
OPENAI_API_KEY=your_openai_api_key
```
## Redis Vector Search Implementation
This implementation follows the official [Redis Vector Search specifications](https://redis.io/docs/latest/develop/interact/search-and-query/query/vector-search/) and uses the [node-redis client](https://redis.io/docs/latest/develop/clients/nodejs/vecsearch/) for proper integration.
### Key Features:
- **HNSW Algorithm**: Uses Hierarchical Navigable Small World for fast vector indexing
- **COSINE Distance**: Semantic similarity using cosine distance metric
- **KNN Queries**: K-nearest neighbors search with `*=>[KNN k @embedding $vector AS distance]`
- **Hash Storage**: Vectors stored as Redis hash documents with binary embedding data
- **node-redis Client**: Official Redis client with full vector search support
### Vector Index Schema:
```typescript
{
page_id: SchemaFieldTypes.TEXT, // Sortable page identifier
workspace_id: SchemaFieldTypes.TEXT, // Sortable workspace filter
space_id: SchemaFieldTypes.TEXT, // Space filter
title: SchemaFieldTypes.TEXT, // Page title
embedding: { // Vector field
type: SchemaFieldTypes.VECTOR,
ALGORITHM: VectorAlgorithms.HNSW, // HNSW indexing
TYPE: 'FLOAT32', // 32-bit floats
DIM: 1536, // Embedding dimensions
DISTANCE_METRIC: 'COSINE', // Cosine similarity
},
indexed_at: SchemaFieldTypes.NUMERIC // Indexing timestamp
}
```
## API Endpoints
### Semantic Search
```http
POST /ai-search/semantic
Content-Type: application/json
{
"query": "machine learning algorithms",
"spaceId": "optional-space-id",
"limit": 20,
"similarity_threshold": 0.7
}
```
### Hybrid Search
```http
POST /ai-search/hybrid
Content-Type: application/json
{
"query": "neural networks",
"spaceId": "optional-space-id",
"limit": 20
}
```
### Reindex Pages
```http
POST /ai-search/reindex
Content-Type: application/json
{
"spaceId": "optional-space-id",
"pageIds": ["page-id-1", "page-id-2"]
}
```
## Usage Examples
### Basic Semantic Search
```typescript
import { AiSearchService } from './ai-search.service';
// Search for pages semantically using vector similarity
const results = await aiSearchService.semanticSearch(
'artificial intelligence concepts',
{ limit: 10, similarity_threshold: 0.8 },
{ userId: 'user-id', workspaceId: 'workspace-id' }
);
```
### Hybrid Search with Weighted Scoring
```typescript
// Combine semantic (70%) and text search (30%)
const results = await aiSearchService.hybridSearch(
'machine learning tutorial',
{ spaceId: 'space-id', limit: 15 },
{ userId: 'user-id', workspaceId: 'workspace-id' }
);
```
## Dependencies
The module uses the official **node-redis** package for Redis integration:
```json
{
"redis": "^4.7.0"
}
```
Install with pnpm:
```bash
pnpm install
```
## Performance Optimizations
### Vector Search Performance
- **HNSW Algorithm**: Provides O(log n) search complexity
- **COSINE Distance**: Efficient for normalized embeddings
- **Batch Operations**: Multi-command execution for bulk indexing
- **Connection Pooling**: Persistent Redis connections
### Memory Efficiency
- **Float32 Vectors**: Reduced memory usage vs Float64
- **TTL Expiration**: Automatic cleanup of old vectors (30 days)
- **Prefix-based Storage**: Organized key structure
## Vector Storage Format
Vectors are stored as Redis hash documents:
```
Key: vector:{workspaceId}:{pageId}
Fields:
page_id: "page-uuid"
workspace_id: "workspace-uuid"
space_id: "space-uuid"
title: "Page Title"
embedding: Buffer<Float32Array> // Binary vector data
indexed_at: "1234567890"
```
## Error Handling
The module includes comprehensive error handling:
- **Connection Resilience**: Automatic reconnection on Redis failures
- **Embedding Retries**: Exponential backoff for API failures
- **Vector Validation**: Dimension and format checking
- **Graceful Degradation**: Fallback to text search on vector errors
This implementation provides production-ready vector search capabilities that scale with your content while maintaining excellent search quality and performance.

View File

@ -0,0 +1,224 @@
# AI Search Setup Guide
This guide will help you set up the AI Search module with Redis vector database for Docmost.
## Prerequisites
1. **Redis with RediSearch**: You need Redis with the RediSearch module for vector operations
2. **OpenAI API Key**: For embedding generation (or alternative provider)
3. **Node.js Dependencies**: The required packages are already added to package.json
## Step 1: Install Redis with RediSearch
### Option A: Using Docker (Recommended)
```bash
# Using Redis Stack (includes RediSearch and vector capabilities)
docker run -d --name redis-stack \
-p 6379:6379 \
-v redis-data:/data \
redis/redis-stack-server:latest
# Or using Redis Enterprise with RediSearch
docker run -d --name redis-vector \
-p 6379:6379 \
-v redis-data:/data \
redislabs/redisearch:latest
```
### Option B: Manual Installation
1. Install Redis from source with RediSearch module
2. Or use Redis Cloud with RediSearch enabled
## Step 2: Configure Environment Variables
Add these variables to your `.env` file:
```env
# ===== Redis Vector Database Configuration =====
REDIS_VECTOR_HOST=localhost
REDIS_VECTOR_PORT=6379
REDIS_VECTOR_PASSWORD=your_redis_password_here
REDIS_VECTOR_DB=0
REDIS_VECTOR_INDEX=docmost_pages
# ===== AI Embedding Configuration (OpenAI-compatible) =====
AI_EMBEDDING_MODEL=text-embedding-3-small
AI_EMBEDDING_DIMENSIONS=1536
AI_EMBEDDING_BASE_URL=https://api.openai.com/v1/embeddings # Optional: for custom providers
# ===== OpenAI API Key (or compatible provider key) =====
OPENAI_API_KEY=your_openai_api_key_here
```
## Step 3: Custom OpenAI-Compatible Providers
You can use any provider that follows the OpenAI embeddings API specification by setting the `AI_EMBEDDING_BASE_URL`:
### Examples:
**Azure OpenAI:**
```env
AI_EMBEDDING_BASE_URL=https://your-resource.openai.azure.com/openai/deployments/your-deployment/embeddings?api-version=2023-05-15
OPENAI_API_KEY=your_azure_openai_key
```
**Ollama (local):**
```env
AI_EMBEDDING_BASE_URL=http://localhost:11434/v1/embeddings
AI_EMBEDDING_MODEL=nomic-embed-text
AI_EMBEDDING_DIMENSIONS=768
```
**Other compatible providers:**
- Together AI
- Anyscale
- OpenRouter
- Any provider implementing OpenAI's embeddings API
## Step 4: Install Dependencies
The required dependencies are already in package.json. Run:
```bash
pnpm install
```
## Step 5: Initialize the Vector Index
The vector index will be created automatically when the service starts. You can also manually trigger reindexing:
```bash
# Using the API endpoint
curl -X POST http://localhost:3000/ai-search/reindex \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{"workspaceId": "your-workspace-id"}'
```
## Step 6: Test the Setup
### Test Semantic Search
```bash
curl -X POST http://localhost:3000/ai-search/semantic \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"query": "machine learning algorithms",
"limit": 10,
"similarity_threshold": 0.7
}'
```
### Test Hybrid Search
```bash
curl -X POST http://localhost:3000/ai-search/hybrid \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"query": "neural networks",
"limit": 10
}'
```
## Step 7: Monitor the Setup
### Check Redis Connection
```bash
redis-cli ping
# Should return PONG
```
### Check RediSearch Module
```bash
redis-cli MODULE LIST
# Should show RediSearch in the list
```
### Check Index Status
```bash
redis-cli FT.INFO docmost_pages
# Should show index information
```
## Troubleshooting
### Common Issues
1. **Redis Connection Error**
- Check if Redis is running: `docker ps` or `redis-cli ping`
- Verify connection details in .env file
- Check firewall/network settings
2. **RediSearch Module Not Found**
- Ensure you're using Redis Stack or Redis with RediSearch
- Check module is loaded: `redis-cli MODULE LIST`
3. **OpenAI API Errors**
- Verify API key is correct and has sufficient credits
- Check API usage limits and quotas
- Ensure model name is correct
4. **Embedding Generation Fails**
- Check text length (max 8000 characters by default)
- Verify network connectivity to embedding provider
- Check API rate limits
5. **Search Returns No Results**
- Ensure pages are indexed: check logs for indexing errors
- Verify similarity threshold (try lowering it)
- Check user permissions for searched content
### Debug Logging
Enable debug logging by setting:
```env
LOG_LEVEL=debug
```
### Performance Tuning
1. **Batch Size**: Adjust based on your API rate limits
```env
AI_SEARCH_BATCH_SIZE=50 # Lower for rate-limited APIs
```
2. **Similarity Threshold**: Balance precision vs recall
```env
AI_SEARCH_SIMILARITY_THRESHOLD=0.6 # Lower = more results
```
3. **Redis Memory**: Monitor memory usage as index grows
```bash
redis-cli INFO memory
```
## Production Deployment
### Redis Configuration
- Use Redis Cluster for high availability
- Set up proper backup and persistence
- Monitor memory usage and performance
- Configure appropriate TTL for vectors
### Security
- Use strong Redis passwords
- Enable TLS for Redis connections
- Secure API keys in environment variables
- Implement proper rate limiting
### Monitoring
- Set up alerts for Redis health
- Monitor embedding API usage and costs
- Track search performance metrics
- Log search queries for analysis
## Next Steps
1. **Auto-indexing**: Pages are automatically indexed on create/update
2. **Client Integration**: Add AI search to your frontend
3. **Custom Scoring**: Implement custom ranking algorithms
4. **Analytics**: Track search usage and effectiveness
For more detailed information, see the main README.md file.

View File

@ -0,0 +1,38 @@
import { Test, TestingModule } from '@nestjs/testing';
import { AiSearchController } from './ai-search.controller';
import { AiSearchService } from './services/ai-search.service';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
describe('AiSearchController', () => {
let controller: AiSearchController;
let service: AiSearchService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
controllers: [AiSearchController],
providers: [
{
provide: AiSearchService,
useValue: {
semanticSearch: jest.fn(),
hybridSearch: jest.fn(),
reindexPages: jest.fn(),
},
},
{
provide: SpaceAbilityFactory,
useValue: {
createForUser: jest.fn(),
},
},
],
}).compile();
controller = module.get<AiSearchController>(AiSearchController);
service = module.get<AiSearchService>(AiSearchService);
});
it('should be defined', () => {
expect(controller).toBeDefined();
});
});

View File

@ -0,0 +1,123 @@
import {
Controller,
Post,
Body,
UseGuards,
HttpCode,
HttpStatus,
BadRequestException,
ForbiddenException,
} from '@nestjs/common';
import { User } from '@docmost/db/types/entity.types';
import { Workspace } from '@docmost/db/types/entity.types';
import { AiSearchService } from './services/ai-search.service';
import { SemanticSearchDto, SemanticSearchShareDto } from './dto/semantic-search.dto';
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
import { AuthUser } from '../../common/decorators/auth-user.decorator';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
import { SpaceCaslAction, SpaceCaslSubject } from '../casl/interfaces/space-ability.type';
import { Public } from '../../common/decorators/public.decorator';
@UseGuards(JwtAuthGuard)
@Controller('ai-search')
export class AiSearchController {
constructor(
private readonly aiSearchService: AiSearchService,
private readonly spaceAbility: SpaceAbilityFactory,
) {}
@HttpCode(HttpStatus.OK)
@Post('semantic')
async semanticSearch(
@Body() searchDto: SemanticSearchDto,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.shareId;
if (searchDto.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
searchDto.spaceId,
);
if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.semanticSearch(searchDto.query, searchDto, {
userId: user.id,
workspaceId: workspace.id,
});
}
@HttpCode(HttpStatus.OK)
@Post('hybrid')
async hybridSearch(
@Body() searchDto: SemanticSearchDto,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.shareId;
if (searchDto.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
searchDto.spaceId,
);
if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.hybridSearch(searchDto.query, searchDto, {
userId: user.id,
workspaceId: workspace.id,
});
}
@Public()
@HttpCode(HttpStatus.OK)
@Post('semantic-share')
async semanticSearchShare(
@Body() searchDto: SemanticSearchShareDto,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.spaceId;
if (!searchDto.shareId) {
throw new BadRequestException('shareId is required');
}
return this.aiSearchService.semanticSearch(searchDto.query, searchDto, {
workspaceId: workspace.id,
});
}
@HttpCode(HttpStatus.OK)
@Post('reindex')
async reindexPages(
@Body() body: { spaceId?: string; pageIds?: string[] },
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
if (body.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
body.spaceId,
);
if (ability.cannot(SpaceCaslAction.Manage, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.reindexPages({
workspaceId: workspace.id,
spaceId: body.spaceId,
pageIds: body.pageIds,
});
}
}

View File

@ -0,0 +1,22 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { AiSearchController } from './ai-search.controller';
import { AiSearchService } from './services/ai-search.service';
import { VectorService } from './services/vector.service';
import { EmbeddingService } from './services/embedding.service';
import { RedisVectorService } from './services/redis-vector.service';
import { PageUpdateListener } from './listeners/page-update.listener';
@Module({
imports: [ConfigModule],
controllers: [AiSearchController],
providers: [
AiSearchService,
VectorService,
EmbeddingService,
RedisVectorService,
PageUpdateListener,
],
exports: [AiSearchService, VectorService, EmbeddingService, RedisVectorService],
})
export class AiSearchModule {}

View File

@ -0,0 +1,50 @@
export const AI_SEARCH_CONFIG = {
// Default similarity thresholds
DEFAULT_SIMILARITY_THRESHOLD: 0.7,
HIGH_SIMILARITY_THRESHOLD: 0.85,
LOW_SIMILARITY_THRESHOLD: 0.6,
// Search limits
MAX_SEARCH_LIMIT: 100,
DEFAULT_SEARCH_LIMIT: 20,
MIN_SEARCH_LIMIT: 1,
// Embedding configuration
DEFAULT_EMBEDDING_DIMENSIONS: 1536,
MAX_TEXT_LENGTH: 8000,
// Indexing configuration
DEFAULT_BATCH_SIZE: 100,
INDEX_TTL_DAYS: 30,
// Hybrid search weights
SEMANTIC_WEIGHT: 0.7,
TEXT_WEIGHT: 0.3,
// Redis configuration
REDIS_KEY_PREFIX: 'docmost:ai-search',
VECTOR_KEY_PREFIX: 'vector',
METADATA_KEY_PREFIX: 'metadata',
// Retry configuration
MAX_RETRIES: 3,
RETRY_DELAY_MS: 1000,
// OpenAI configuration
OPENAI_BATCH_SIZE: 100,
} as const;
export const EMBEDDING_MODELS = {
OPENAI: {
'text-embedding-3-small': 1536,
'text-embedding-3-large': 3072,
'text-embedding-ada-002': 1536,
},
} as const;
export const SEARCH_EVENTS = {
PAGE_CREATED: 'page.created',
PAGE_UPDATED: 'page.updated',
PAGE_DELETED: 'page.deleted',
BULK_REINDEX: 'ai-search.bulk-reindex',
} as const;

View File

@ -0,0 +1,103 @@
import {
IsNotEmpty,
IsString,
IsOptional,
IsNumber,
Min,
Max,
IsArray,
IsBoolean,
} from 'class-validator';
export class SemanticSearchDto {
@IsNotEmpty()
@IsString()
query: string;
@IsOptional()
@IsString()
spaceId?: string;
@IsOptional()
@IsString()
shareId?: string;
@IsOptional()
@IsString()
creatorId?: string;
@IsOptional()
@IsNumber()
@Min(1)
@Max(100)
limit?: number = 20;
@IsOptional()
@IsNumber()
@Min(0)
offset?: number = 0;
@IsOptional()
@IsNumber()
@Min(0)
@Max(1)
similarity_threshold?: number = 0.7;
@IsOptional()
@IsBoolean()
include_highlights?: boolean = true;
@IsOptional()
@IsArray()
@IsString({ each: true })
filters?: string[];
}
export class SemanticSearchShareDto extends SemanticSearchDto {
@IsNotEmpty()
@IsString()
shareId: string;
@IsOptional()
@IsString()
spaceId?: string;
}
export class SemanticSearchResponseDto {
id: string;
title: string;
icon: string;
parentPageId: string;
creatorId: string;
similarity_score: number;
semantic_rank: number;
highlight: string;
createdAt: Date;
updatedAt: Date;
space?: {
id: string;
name: string;
slug: string;
};
}
export class HybridSearchResponseDto extends SemanticSearchResponseDto {
text_rank?: number;
combined_score: number;
search_type: 'semantic' | 'text' | 'hybrid';
}
export class ReindexDto {
@IsOptional()
@IsString()
spaceId?: string;
@IsOptional()
@IsArray()
@IsString({ each: true })
pageIds?: string[];
@IsNotEmpty()
@IsString()
workspaceId: string;
}

View File

@ -0,0 +1,88 @@
import { Injectable, Logger } from '@nestjs/common';
import { OnEvent } from '@nestjs/event-emitter';
import { AiSearchService } from '../services/ai-search.service';
import { EmbeddingService } from '../services/embedding.service';
import { RedisVectorService } from '../services/redis-vector.service';
import { Page } from '@docmost/db/types/entity.types';
import { UpdatedPageEvent } from '../../../collaboration/listeners/history.listener';
export interface PageUpdateEvent {
pageId: string;
workspaceId: string;
spaceId: string;
title?: string;
textContent?: string;
operation: 'create' | 'update' | 'delete';
}
@Injectable()
export class PageUpdateListener {
private readonly logger = new Logger(PageUpdateListener.name);
constructor(
private readonly aiSearchService: AiSearchService,
private readonly embeddingService: EmbeddingService,
private readonly redisVectorService: RedisVectorService,
) {}
@OnEvent('page.created')
async handlePageCreated(event: Page) {
await this.indexPage(event);
}
@OnEvent('collab.page.updated')
async handlePageUpdated(event: UpdatedPageEvent) {
await this.indexPage(event.page);
}
@OnEvent('page.deleted')
async handlePageDeleted(event: Page) {
try {
await this.redisVectorService.deletePage(event.id, event.workspaceId);
this.logger.debug(`Removed page ${event.id} from vector index`);
} catch (error) {
this.logger.error(
`Failed to remove page ${event.id} from vector index:`,
error,
);
}
}
private async indexPage(event: Page) {
try {
const content = `${event.title || ''} ${event.textContent || ''}`.trim();
if (!content) {
this.logger.debug(
`Skipping indexing for page ${event.id} - no content`,
);
return;
}
if (!this.embeddingService.isConfigured()) {
this.logger.debug(
'Embedding service not configured, skipping indexing',
);
return;
}
const embedding = await this.embeddingService.generateEmbedding(content);
console.log('embedding', embedding);
await this.redisVectorService.indexPage({
pageId: event.id,
embedding,
metadata: {
title: event.title,
workspaceId: event.workspaceId,
spaceId: event.spaceId,
},
});
this.logger.debug(`Indexed page ${event.id} for AI search`);
} catch (error) {
this.logger.error(`Failed to index page ${event.id}:`, error);
}
}
}

View File

@ -0,0 +1,438 @@
import { Injectable, Logger } from '@nestjs/common';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { sql } from 'kysely';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { SpaceMemberRepo } from '@docmost/db/repos/space/space-member.repo';
import { ShareRepo } from '@docmost/db/repos/share/share.repo';
import { VectorService } from './vector.service';
import { EmbeddingService } from './embedding.service';
import { RedisVectorService } from './redis-vector.service';
import {
SemanticSearchDto,
SemanticSearchResponseDto,
HybridSearchResponseDto,
ReindexDto,
} from '../dto/semantic-search.dto';
// eslint-disable-next-line @typescript-eslint/no-require-imports
const tsquery = require('pg-tsquery')();
@Injectable()
export class AiSearchService {
private readonly logger = new Logger(AiSearchService.name);
constructor(
@InjectKysely() private readonly db: KyselyDB,
private readonly pageRepo: PageRepo,
private readonly shareRepo: ShareRepo,
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly vectorService: VectorService,
private readonly embeddingService: EmbeddingService,
private readonly redisVectorService: RedisVectorService,
) {}
async semanticSearch(
query: string,
searchParams: SemanticSearchDto,
opts: {
userId?: string;
workspaceId: string;
},
): Promise<SemanticSearchResponseDto[]> {
if (query.length < 1) {
return [];
}
try {
// Generate embedding for the query
const queryEmbedding =
await this.embeddingService.generateEmbedding(query);
// Get page IDs that user has access to
const accessiblePageIds = await this.getAccessiblePageIds(
searchParams,
opts,
);
console.log('accessible', accessiblePageIds);
if (accessiblePageIds.length === 0) {
return [];
}
// Perform vector search
const vectorResults = await this.redisVectorService.searchSimilar(
queryEmbedding,
{
limit: searchParams.limit || 20,
offset: searchParams.offset || 0,
threshold: searchParams.similarity_threshold || 0.7,
filters: {
workspace_id: opts.workspaceId,
page_ids: accessiblePageIds,
},
},
);
console.log('vectorResults', vectorResults);
if (vectorResults.length === 0) {
return [];
}
// Get page details from database
const pageIds = vectorResults.map((result) => result.pageId);
const pages = await this.getPageDetails(pageIds, searchParams);
// Combine vector results with page details
const results = this.combineVectorResultsWithPages(
vectorResults,
pages,
query,
searchParams.include_highlights,
);
return results;
} catch (error) {
this.logger.error(`Semantic search failed: ${error?.['message']}`, error);
throw error;
}
}
async hybridSearch(
query: string,
searchParams: SemanticSearchDto,
opts: {
userId?: string;
workspaceId: string;
},
): Promise<HybridSearchResponseDto[]> {
if (query.length < 1) {
return [];
}
try {
// Run both semantic and text search in parallel
const [semanticResults, textResults] = await Promise.all([
this.semanticSearch(query, searchParams, opts),
this.performTextSearch(query, searchParams, opts),
]);
// Combine and rank results
const hybridResults = this.combineHybridResults(
semanticResults,
textResults,
query,
);
return hybridResults;
} catch (error) {
this.logger.error(`Hybrid search failed: ${error?.['message']}`, error);
throw error;
}
}
async reindexPages(
params: ReindexDto,
): Promise<{ indexed: number; errors?: string[] }> {
try {
let query = this.db
.selectFrom('pages')
.select(['id', 'title', 'textContent'])
.where('workspaceId', '=', params.workspaceId)
.where('deletedAt', 'is', null);
if (params.spaceId) {
query = query.where('spaceId', '=', params.spaceId);
}
if (params.pageIds && params.pageIds.length > 0) {
query = query.where('id', 'in', params.pageIds);
}
const pages = await query.execute();
const results = await Promise.allSettled(
pages.map(async (page) => {
const content =
`${page.title || ''} ${page.textContent || ''}`.trim();
if (!content) return null;
const embedding =
await this.embeddingService.generateEmbedding(content);
await this.redisVectorService.indexPage({
pageId: page.id,
embedding,
metadata: {
title: page.title,
workspaceId: params.workspaceId,
},
});
return page.id;
}),
);
const indexed = results.filter(
(r) => r.status === 'fulfilled' && r.value,
).length;
const errors = results
.filter((r) => r.status === 'rejected')
.map((r) => r.reason.message);
this.logger.log(
`Reindexed ${indexed} pages for workspace ${params.workspaceId}`,
);
return { indexed, errors: errors.length > 0 ? errors : undefined };
} catch (error) {
this.logger.error(`Reindexing failed: ${error?.['message']}`, error);
throw error;
}
}
private async getAccessiblePageIds(
searchParams: SemanticSearchDto,
opts: { userId?: string; workspaceId: string },
): Promise<string[]> {
if (searchParams.shareId) {
// Handle shared pages
const share = await this.shareRepo.findById(searchParams.shareId);
if (!share || share.workspaceId !== opts.workspaceId) {
return [];
}
const pageIdsToSearch = [];
if (share.includeSubPages) {
const pageList = await this.pageRepo.getPageAndDescendants(
share.pageId,
{ includeContent: false },
);
pageIdsToSearch.push(...pageList.map((page) => page.id));
} else {
pageIdsToSearch.push(share.pageId);
}
return pageIdsToSearch;
}
if (searchParams.spaceId) {
// Get pages from specific space
const pages = await this.db
.selectFrom('pages')
.select('id')
.where('spaceId', '=', searchParams.spaceId)
.where('workspaceId', '=', opts.workspaceId)
.where('deletedAt', 'is', null)
.execute();
return pages.map((p) => p.id);
}
if (opts.userId) {
// Get pages from user's accessible spaces
const userSpaceIds = await this.spaceMemberRepo.getUserSpaceIds(
opts.userId,
);
if (userSpaceIds.length === 0) {
return [];
}
const pages = await this.db
.selectFrom('pages')
.select('id')
.where('spaceId', 'in', userSpaceIds)
.where('workspaceId', '=', opts.workspaceId)
.where('deletedAt', 'is', null)
.execute();
return pages.map((p) => p.id);
}
return [];
}
private async getPageDetails(
pageIds: string[],
searchParams: SemanticSearchDto,
) {
let query = this.db
.selectFrom('pages')
.select([
'id',
'slugId',
'title',
'icon',
'parentPageId',
'creatorId',
'createdAt',
'updatedAt',
'textContent',
]);
if (!searchParams.shareId) {
query = query.select((eb) => this.pageRepo.withSpace(eb));
}
const pages = await query
.where('id', 'in', pageIds)
.where('deletedAt', 'is', null)
.execute();
return pages;
}
private combineVectorResultsWithPages(
vectorResults: any[],
pages: any[],
query: string,
includeHighlights: boolean = true,
): SemanticSearchResponseDto[] {
const pageMap = new Map(pages.map((p) => [p.id, p]));
return vectorResults
.map((result, index) => {
const page = pageMap.get(result.pageId);
if (!page) return null;
let highlight = '';
if (includeHighlights && page.textContent) {
highlight = this.generateHighlight(page.textContent, query);
}
return {
id: page.id,
title: page.title,
icon: page.icon,
parentPageId: page.parentPageId,
creatorId: page.creatorId,
similarity_score: result.score,
semantic_rank: index + 1,
highlight,
createdAt: page.createdAt,
updatedAt: page.updatedAt,
space: page.space
? {
id: page.space.id,
name: page.space.name,
slug: page.space.slug,
}
: undefined,
};
})
.filter(Boolean);
}
private async performTextSearch(
query: string,
searchParams: SemanticSearchDto,
opts: { userId?: string; workspaceId: string },
) {
const searchQuery = tsquery(query.trim() + '*');
const accessiblePageIds = await this.getAccessiblePageIds(
searchParams,
opts,
);
if (accessiblePageIds.length === 0) {
return [];
}
const results = await this.db
.selectFrom('pages')
.select([
'id',
'slugId',
'title',
'icon',
'parentPageId',
'creatorId',
'createdAt',
'updatedAt',
sql<number>`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('text_rank'),
sql<string>`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
'highlight',
),
])
.where('tsv', '@@', sql<string>`to_tsquery(${searchQuery})`)
.where('id', 'in', accessiblePageIds)
.orderBy('text_rank', 'desc')
.limit(searchParams.limit || 20)
.execute();
return results.map((result) => ({
...result,
text_rank: result.text_rank,
search_type: 'text' as const,
}));
}
private combineHybridResults(
semanticResults: SemanticSearchResponseDto[],
textResults: any[],
query: string,
): HybridSearchResponseDto[] {
const combinedMap = new Map<string, HybridSearchResponseDto>();
// Add semantic results
semanticResults.forEach((result, index) => {
combinedMap.set(result.id, {
...result,
text_rank: undefined,
combined_score: result.similarity_score * 0.7, // Weight semantic results
search_type: 'semantic',
});
});
// Add text results or combine with existing
textResults.forEach((result, index) => {
const existing = combinedMap.get(result.id);
if (existing) {
// Combine scores
existing.combined_score =
existing.similarity_score * 0.7 + result.text_rank * 0.3;
existing.text_rank = result.text_rank;
existing.search_type = 'hybrid';
} else {
combinedMap.set(result.id, {
id: result.id,
title: result.title,
icon: result.icon,
parentPageId: result.parentPageId,
creatorId: result.creatorId,
similarity_score: 0,
semantic_rank: 0,
text_rank: result.text_rank,
combined_score: result.text_rank * 0.3,
highlight: result.highlight,
createdAt: result.createdAt,
updatedAt: result.updatedAt,
search_type: 'text',
});
}
});
// Sort by combined score
return Array.from(combinedMap.values())
.sort((a, b) => b.combined_score - a.combined_score)
.slice(0, 20);
}
private generateHighlight(content: string, query: string): string {
if (!content) return '';
const words = query.toLowerCase().split(/\s+/);
const sentences = content.split(/[.!?]+/);
for (const sentence of sentences) {
const lowerSentence = sentence.toLowerCase();
if (words.some((word) => lowerSentence.includes(word))) {
return sentence.trim().substring(0, 200) + '...';
}
}
return content.substring(0, 200) + '...';
}
}

View File

@ -0,0 +1,185 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import OpenAI from 'openai';
export interface EmbeddingConfig {
model: string;
apiKey?: string;
baseUrl?: string;
dimensions: number;
}
export interface EmbeddingResult {
embedding: number[];
tokens: number;
model: string;
}
@Injectable()
export class EmbeddingService {
private readonly logger = new Logger(EmbeddingService.name);
private readonly config: EmbeddingConfig;
private readonly openai: OpenAI;
constructor(private readonly configService: ConfigService) {
this.config = {
model: this.configService.get<string>(
'AI_EMBEDDING_MODEL',
'text-embedding-3-small',
),
apiKey: this.configService.get<string>('OPENAI_API_KEY'),
baseUrl: 'https://api.openai.com/v1/',
dimensions: Number(
this.configService.get<string>('AI_EMBEDDING_DIMENSIONS', '1536'),
),
};
if (!this.config.apiKey) {
this.logger.warn(
'OpenAI API key not configured. AI search will not work.',
);
}
// Initialize OpenAI client with optional custom base URL
this.openai = new OpenAI({
apiKey: this.config.apiKey || 'dummy-key',
baseURL: this.config.baseUrl,
});
}
/**
* Generate embedding for a single text
*/
async generateEmbedding(text: string): Promise<number[]> {
if (!text || text.trim().length === 0) {
throw new Error('Text cannot be empty');
}
const cleanText = this.preprocessText(text);
console.log('generate clean text', cleanText);
try {
const result = await this.generateEmbeddingWithOpenAI(cleanText);
console.log('embedding results', result);
return result.embedding;
} catch (error) {
this.logger.error(`Embedding generation failed:`, error);
}
}
/**
* Generate embeddings for multiple texts in batch
*/
async generateEmbeddings(texts: string[]): Promise<number[][]> {
if (!texts || texts.length === 0) {
return [];
}
const cleanTexts = texts.map((text) => this.preprocessText(text));
const batchSize = this.getBatchSize();
const results: number[][] = [];
for (let i = 0; i < cleanTexts.length; i += batchSize) {
const batch = cleanTexts.slice(i, i + batchSize);
try {
const batchResults = await this.generateBatchEmbeddings(batch);
results.push(...batchResults);
} catch (error) {
this.logger.error(
`Batch embedding generation failed for batch ${i}:`,
error,
);
throw error;
}
}
return results;
}
/**
* Generate embedding using OpenAI API
*/
private async generateEmbeddingWithOpenAI(
text: string,
): Promise<EmbeddingResult> {
const response = await this.openai.embeddings.create({
model: this.config.model,
input: text,
dimensions: this.config.dimensions,
});
if (!response.data || response.data.length === 0) {
throw new Error('Invalid response from OpenAI API');
}
return {
embedding: response.data[0].embedding,
tokens: response.usage?.total_tokens || 0,
model: this.config.model,
};
}
/**
* Generate embeddings for multiple texts
*/
private async generateBatchEmbeddings(texts: string[]): Promise<number[][]> {
const response = await this.openai.embeddings.create({
model: this.config.model,
input: texts,
dimensions: this.config.dimensions,
});
if (!response.data || !Array.isArray(response.data)) {
throw new Error('Invalid response from OpenAI API');
}
return response.data.map((item) => item.embedding);
}
/**
* Preprocess text before embedding generation
*/
private preprocessText(text: string): string {
if (!text) return '';
// Remove excessive whitespace
let processed = text.replace(/\s+/g, ' ').trim();
// Truncate if too long (most models have token limits)
const maxLength = 8000; // Conservative limit
if (processed.length > maxLength) {
processed = processed.substring(0, maxLength);
}
return processed;
}
/**
* Get batch size for OpenAI API
*/
private getBatchSize(): number {
return 100; // OpenAI supports up to 2048 inputs
}
/**
* Sleep utility for retries
*/
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Check if embedding service is configured
*/
isConfigured(): boolean {
return !!this.config.apiKey;
}
/**
* Get embedding configuration
*/
getConfig(): EmbeddingConfig {
return { ...this.config };
}
}

View File

@ -0,0 +1,393 @@
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
import {
VectorSearchOptions,
VectorSearchResult,
VectorService,
} from './vector.service';
import {
createClient,
RedisClientType,
SCHEMA_FIELD_TYPE,
SCHEMA_VECTOR_FIELD_ALGORITHM,
} from 'redis';
import { EnvironmentService } from '../../../integrations/environment/environment.service';
export interface IndexPageData {
pageId: string;
embedding: number[];
metadata: {
title?: string;
workspaceId: string;
spaceId?: string;
[key: string]: any;
};
}
export interface RedisVectorConfig {
host: string;
port: number;
password?: string;
db?: number;
indexName: string;
vectorDimension: number;
}
@Injectable()
export class RedisVectorService implements OnModuleDestroy {
private readonly logger = new Logger(RedisVectorService.name);
private readonly redis: RedisClientType;
private readonly config: RedisVectorConfig;
private isIndexCreated = false;
constructor(
private readonly environmentService: EnvironmentService,
private readonly vectorService: VectorService,
) {
//@ts-ignore
this.config = {
indexName: 'docmost_pages_index',
vectorDimension: 1536, //AI_EMBEDDING_DIMENSIONS
};
this.redis = createClient({
url: this.environmentService.getRedisUrl(),
});
this.redis.on('error', (err) => {
this.logger.error('Redis Client Error:', err);
});
this.initializeConnection();
}
async searchSimilar(
queryEmbedding: number[],
options: VectorSearchOptions,
): Promise<VectorSearchResult[]> {
try {
await this.ensureIndexExists();
const { limit = 20, offset = 0, threshold = 0.7, filters } = options;
// Build query following Redis specs
let query = `*=>[KNN ${limit + offset} @embedding $vector AS score]`;
// Apply filters if provided
if (filters && Object.keys(filters).length > 0) {
const filterClauses = Object.entries(filters).map(([key, value]) => {
if (Array.isArray(value)) {
return `@${key}:{${value.join('|')}}`;
}
return `@${key}:${value}`;
});
query = `(${filterClauses.join(' ')})=>[KNN ${limit + offset} @embedding $vector AS score]`;
}
// Execute search using proper node-redis syntax
const searchOptions = {
PARAMS: {
vector: Buffer.from(new Float32Array(queryEmbedding).buffer),
},
SORTBY: {
BY: '@score' as `@${string}`,
DIRECTION: 'ASC' as 'ASC',
},
LIMIT: {
from: offset,
size: limit,
},
RETURN: ['page_id', 'workspace_id', 'space_id', 'title', 'score'],
DIALECT: 2,
};
console.log(searchOptions);
//is not assignable to parameter of type FtSearchOptions
// Types of property SORTBY are incompatible.
// Type { BY: string; DIRECTION: string; } is not assignable to type
// RedisArgument | { BY: `@${string}` | `$.${string}`; DIRECTION?: 'DESC' | 'ASC'; }
const searchResult = await this.redis.ft.search(
this.config.indexName,
query,
searchOptions,
);
const results = this.parseSearchResults(searchResult, threshold);
this.logger.debug(`Vector search found ${results.length} results`);
return results;
} catch (error) {
this.logger.error('Vector search failed:', error);
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
async indexPage(data: IndexPageData): Promise<void> {
try {
await this.ensureIndexExists();
const key = this.vectorService.createVectorKey(
data.pageId,
data.metadata.workspaceId,
);
// Store vector and metadata using proper node-redis hash operations
await this.redis.hSet(key, {
page_id: data.pageId,
workspace_id: data.metadata.workspaceId,
space_id: data.metadata.spaceId || '',
title: data.metadata.title || '',
embedding: Buffer.from(new Float32Array(data.embedding).buffer),
indexed_at: Date.now().toString(),
});
// Set TTL for the key
await this.redis.expire(key, 86400 * 30); // 30 days TTL
this.logger.debug(
`Indexed page ${data.pageId} in workspace ${data.metadata.workspaceId}`,
);
} catch (error) {
this.logger.error(
`Failed to index page ${data.pageId}: ${error?.['message']}`,
error,
);
throw error;
}
}
async deletePage(pageId: string, workspaceId: string): Promise<void> {
try {
const key = this.vectorService.createVectorKey(pageId, workspaceId);
await this.redis.del(key);
this.logger.debug(`Deleted page ${pageId} from vector index`);
} catch (error) {
this.logger.error(
`Failed to delete page ${pageId}: ${error?.['message']}`,
error,
);
throw error;
}
}
async batchIndexPages(
pages: IndexPageData[],
): Promise<{ indexed: number; errors: string[] }> {
const errors: string[] = [];
let indexed = 0;
try {
await this.ensureIndexExists();
// Process in batches to avoid memory issues
const batchSize = 100;
for (let i = 0; i < pages.length; i += batchSize) {
const batch = pages.slice(i, i + batchSize);
// Use node-redis multi for batch operations
const multi = this.redis.multi();
for (const page of batch) {
try {
const key = this.vectorService.createVectorKey(
page.pageId,
page.metadata.workspaceId,
);
multi.hSet(key, {
page_id: page.pageId,
workspace_id: page.metadata.workspaceId,
space_id: page.metadata.spaceId || '',
title: page.metadata.title || '',
embedding: Buffer.from(new Float32Array(page.embedding).buffer),
indexed_at: Date.now().toString(),
});
multi.expire(key, 86400 * 30);
} catch (error) {
errors.push(`Page ${page.pageId}: ${error?.['message']}`);
}
}
const results = await multi.exec();
// Count successful operations
const batchIndexed =
//@ts-ignore
results?.filter((result) => !result.error).length || 0;
indexed += Math.floor(batchIndexed / 2); // Each page has 2 operations (hSet + expire)
}
this.logger.log(
`Batch indexed ${indexed} pages with ${errors.length} errors`,
);
return { indexed, errors };
} catch (error) {
this.logger.error(`Batch indexing failed: ${error?.['message']}`, error);
throw error;
}
}
private async initializeConnection(): Promise<void> {
try {
await this.redis.connect();
console.log('create');
await this.createIndex();
this.isIndexCreated = true;
this.logger.log('Redis vector database connected and index initialized');
} catch (error) {
this.logger.error(
`Failed to initialize vector index: ${error?.['message']}`,
error,
);
console.error(error);
}
}
private async ensureIndexExists(): Promise<void> {
console.log('creating index 1111');
if (!this.isIndexCreated) {
console.log('creating index');
await this.createIndex();
this.isIndexCreated = true;
}
}
private async createIndex(): Promise<void> {
try {
// Check if index already exists using proper node-redis syntax
await this.redis.ft.info(this.config.indexName);
this.logger.debug(`Vector index ${this.config.indexName} already exists`);
return;
} catch (error) {
// Index doesn't exist, create it
}
try {
// Create index using proper node-redis schema definition
await this.redis.ft.create(
this.config.indexName,
{
page_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
SORTABLE: true,
},
workspace_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
SORTABLE: true,
},
space_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
},
title: {
type: SCHEMA_FIELD_TYPE.TEXT,
},
embedding: {
type: SCHEMA_FIELD_TYPE.VECTOR,
ALGORITHM: SCHEMA_VECTOR_FIELD_ALGORITHM.HNSW,
TYPE: 'FLOAT32',
DIM: this.config.vectorDimension,
DISTANCE_METRIC: 'COSINE',
},
indexed_at: {
type: SCHEMA_FIELD_TYPE.NUMERIC,
SORTABLE: true,
},
},
{
ON: 'HASH',
PREFIX: 'vector:',
},
);
this.logger.log(`Created vector index ${this.config.indexName}`);
} catch (error) {
if (error?.['message']?.includes('Index already exists')) {
this.logger.debug('Vector index already exists');
} else {
throw error;
}
}
}
private parseSearchResults(
results: any,
threshold: number,
): VectorSearchResult[] {
if (!results?.documents || results.documents.length === 0) {
return [];
}
const parsed: VectorSearchResult[] = [];
for (const doc of results.documents) {
const distance = parseFloat(doc.value?.distance || '1');
const similarity = 1 - distance; // Convert distance to similarity
if (similarity >= threshold) {
parsed.push({
pageId: doc.value?.page_id || doc.id.split(':')[1],
score: similarity,
metadata: {
workspaceId: doc.value?.workspace_id,
spaceId: doc.value?.space_id,
title: doc.value?.title,
distance,
},
});
}
}
return parsed;
}
async getIndexStats(): Promise<{
totalDocs: number;
indexSize: string;
vectorCount: number;
}> {
try {
const info = await this.redis.ft.info(this.config.indexName);
return {
//@ts-ignore
totalDocs: info.numDocs || 0,
//@ts-ignore
indexSize: info.indexSize || '0',
//@ts-ignore
vectorCount: info.numDocs || 0,
};
} catch (error) {
this.logger.error(`Failed to get index stats: ${error?.['message']}`);
return { totalDocs: 0, indexSize: '0', vectorCount: 0 };
}
}
async deleteIndex(): Promise<void> {
try {
await this.redis.ft.dropIndex(this.config.indexName);
this.isIndexCreated = false;
this.logger.log(`Deleted vector index ${this.config.indexName}`);
} catch (error) {
this.logger.error(`Failed to delete index: ${error?.['message']}`);
throw error;
}
}
async disconnect(): Promise<void> {
try {
await this.redis.quit();
this.logger.log('Redis vector database disconnected');
} catch (error) {
this.logger.error(
`Failed to disconnect from Redis: ${error?.['message']}`,
);
}
}
async onModuleDestroy() {
await this.disconnect();
}
}

View File

@ -0,0 +1,216 @@
import { Injectable, Logger } from '@nestjs/common';
export interface VectorSearchResult {
pageId: string;
score: number;
metadata?: Record<string, any>;
}
export interface VectorSearchOptions {
limit?: number;
offset?: number;
threshold?: number;
filters?: Record<string, any>;
}
@Injectable()
export class VectorService {
private readonly logger = new Logger(VectorService.name);
/**
* Calculate cosine similarity between two vectors
*/
cosineSimilarity(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vectorA.length; i++) {
dotProduct += vectorA[i] * vectorB[i];
normA += vectorA[i] * vectorA[i];
normB += vectorB[i] * vectorB[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
if (magnitude === 0) {
return 0;
}
return dotProduct / magnitude;
}
/**
* Calculate Euclidean distance between two vectors
*/
euclideanDistance(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let sum = 0;
for (let i = 0; i < vectorA.length; i++) {
const diff = vectorA[i] - vectorB[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
/**
* Calculate dot product similarity
*/
dotProductSimilarity(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let dotProduct = 0;
for (let i = 0; i < vectorA.length; i++) {
dotProduct += vectorA[i] * vectorB[i];
}
return dotProduct;
}
/**
* Normalize a vector to unit length
*/
normalizeVector(vector: number[]): number[] {
const magnitude = Math.sqrt(
vector.reduce((sum, val) => sum + val * val, 0),
);
if (magnitude === 0) {
return vector;
}
return vector.map((val) => val / magnitude);
}
/**
* Convert vector to string format for Redis storage
*/
vectorToString(vector: number[]): string {
return vector.join(',');
}
/**
* Parse vector from string format
*/
stringToVector(vectorString: string): number[] {
return vectorString.split(',').map((val) => parseFloat(val));
}
/**
* Validate vector format and dimensions
*/
validateVector(vector: number[], expectedDimensions?: number): boolean {
if (!Array.isArray(vector)) {
return false;
}
if (vector.length === 0) {
return false;
}
if (expectedDimensions && vector.length !== expectedDimensions) {
return false;
}
return vector.every((val) => typeof val === 'number' && !isNaN(val));
}
/**
* Calculate similarity score with configurable method
*/
calculateSimilarity(
vectorA: number[],
vectorB: number[],
method: 'cosine' | 'euclidean' | 'dot' = 'cosine',
): number {
switch (method) {
case 'cosine':
return this.cosineSimilarity(vectorA, vectorB);
case 'euclidean': // Convert distance to similarity (0-1 scale)
{
const distance = this.euclideanDistance(vectorA, vectorB);
return 1 / (1 + distance);
}
case 'dot':
return this.dotProductSimilarity(vectorA, vectorB);
default:
throw new Error(`Unsupported similarity method: ${method}`);
}
}
/**
* Filter results by similarity threshold
*/
filterByThreshold(
results: VectorSearchResult[],
threshold: number,
): VectorSearchResult[] {
return results.filter((result) => result.score >= threshold);
}
/**
* Sort results by similarity score (descending)
*/
sortByScore(results: VectorSearchResult[]): VectorSearchResult[] {
return results.sort((a, b) => b.score - a.score);
}
/**
* Apply pagination to results
*/
paginateResults(
results: VectorSearchResult[],
offset: number = 0,
limit: number = 20,
): VectorSearchResult[] {
return results.slice(offset, offset + limit);
}
/**
* Create vector index key for Redis
*/
createVectorKey(pageId: string, workspaceId: string): string {
return `vector:${workspaceId}:${pageId}`;
}
/**
* Create metadata key for Redis
*/
createMetadataKey(pageId: string, workspaceId: string): string {
return `metadata:${workspaceId}:${pageId}`;
}
/**
* Batch process vectors with chunking
*/
async batchProcess<T, R>(
items: T[],
processor: (batch: T[]) => Promise<R[]>,
batchSize: number = 100,
): Promise<R[]> {
const results: R[] = [];
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
try {
const batchResults = await processor(batch);
results.push(...batchResults);
} catch (error) {
this.logger.error(
`Batch processing failed for items ${i}-${i + batch.length}:`,
error,
);
throw error;
}
}
return results;
}
}

View File

@ -11,6 +11,7 @@ import { PageModule } from './page/page.module';
import { AttachmentModule } from './attachment/attachment.module';
import { CommentModule } from './comment/comment.module';
import { SearchModule } from './search/search.module';
import { AiSearchModule } from './ai-search/ai-search.module';
import { SpaceModule } from './space/space.module';
import { GroupModule } from './group/group.module';
import { CaslModule } from './casl/casl.module';
@ -26,6 +27,7 @@ import { ShareModule } from './share/share.module';
AttachmentModule,
CommentModule,
SearchModule,
AiSearchModule,
SpaceModule,
GroupModule,
CaslModule,

View File

@ -205,4 +205,12 @@ export class EnvironmentService {
.toLowerCase();
return disable === 'true';
}
getPostHogHost(): string {
return this.configService.get<string>('POSTHOG_HOST');
}
getPostHogKey(): string {
return this.configService.get<string>('POSTHOG_KEY');
}
}

View File

@ -47,6 +47,8 @@ export class StaticModule implements OnModuleInit {
BILLING_TRIAL_DAYS: this.environmentService.isCloud()
? this.environmentService.getBillingTrialDays()
: undefined,
POSTHOG_HOST: this.environmentService.getPostHogHost(),
POSTHOG_KEY: this.environmentService.getPostHogKey(),
};
const windowScriptContent = `<script>window.CONFIG=${JSON.stringify(configString)};</script>`;

124
pnpm-lock.yaml generated
View File

@ -296,6 +296,9 @@ importers:
mitt:
specifier: ^3.0.1
version: 3.0.1
posthog-js:
specifier: ^1.255.1
version: 1.255.1
react:
specifier: ^18.3.1
version: 18.3.1
@ -528,6 +531,9 @@ importers:
nodemailer:
specifier: ^7.0.3
version: 7.0.3
openai:
specifier: ^5.8.2
version: 5.8.2(ws@8.18.2)(zod@3.25.56)
openid-client:
specifier: ^5.7.1
version: 5.7.1
@ -549,6 +555,9 @@ importers:
react:
specifier: ^18.3.1
version: 18.3.1
redis:
specifier: ^5.5.6
version: 5.5.6
reflect-metadata:
specifier: ^0.2.2
version: 0.2.2
@ -3346,6 +3355,34 @@ packages:
peerDependencies:
react: ^18.0 || ^19.0 || ^19.0.0-rc
'@redis/bloom@5.5.6':
resolution: {integrity: sha512-bNR3mxkwtfuCxNOzfV8B3R5zA1LiN57EH6zK4jVBIgzMzliNuReZXBFGnXvsi80/SYohajn78YdpYI+XNpqL+A==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/client@5.5.6':
resolution: {integrity: sha512-M3Svdwt6oSfyfQdqEr0L2HOJH2vK7GgCFx1NfAQvpWAT4+ljoT1L5S5cKT3dA9NJrxrOPDkdoTPWJnIrGCOcmw==}
engines: {node: '>= 18'}
'@redis/json@5.5.6':
resolution: {integrity: sha512-AIsoe3SsGQagqAmSQHaqxEinm5oCWr7zxPWL90kKaEdLJ+zw8KBznf2i9oK0WUFP5pFssSQUXqnscQKe2amfDQ==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/search@5.5.6':
resolution: {integrity: sha512-JSqasYqO0mVcHL7oxvbySRBBZYRYhFl3W7f0Da7BW8M/r0Z9wCiVrdjnN4/mKBpWZkoJT/iuisLUdPGhpKxBew==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/time-series@5.5.6':
resolution: {integrity: sha512-jkpcgq3NOI3TX7xEAJ3JgesJTxAx7k0m6lNxNsYdEM8KOl+xj7GaB/0CbLkoricZDmFSEAz7ClA1iK9XkGHf+Q==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@remirror/core-constants@3.0.0':
resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==}
@ -5213,6 +5250,9 @@ packages:
core-js-compat@3.35.0:
resolution: {integrity: sha512-5blwFAddknKeNgsjBzilkdQ0+YK8L1PfqPYq40NOYMYFSS38qj+hpTcLLWwpIwA2A5bje/x5jmVn2tzUMg9IVw==}
core-js@3.43.0:
resolution: {integrity: sha512-N6wEbTTZSYOY2rYAn85CuvWWkCK6QweMn7/4Nr3w+gDBeBhk/x4EJeY6FPo4QzDoJZxVTv8U7CMvgWk6pOHHqA==}
core-util-is@1.0.3:
resolution: {integrity: sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==}
@ -5988,6 +6028,9 @@ packages:
picomatch:
optional: true
fflate@0.4.8:
resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==}
fflate@0.8.2:
resolution: {integrity: sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==}
@ -7595,6 +7638,18 @@ packages:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
openai@5.8.2:
resolution: {integrity: sha512-8C+nzoHYgyYOXhHGN6r0fcb4SznuEn1R7YZMvlqDbnCuE0FM2mm3T1HiYW6WIcMS/F1Of2up/cSPjLPaWt0X9Q==}
hasBin: true
peerDependencies:
ws: ^8.18.0
zod: ^3.23.8
peerDependenciesMeta:
ws:
optional: true
zod:
optional: true
openid-client@5.7.1:
resolution: {integrity: sha512-jDBPgSVfTnkIh71Hg9pRvtJc6wTwqjRkN88+gCFtYWrlP4Yx2Dsrow8uPi3qLr/aeymPF3o2+dS+wOpglK04ew==}
@ -7955,9 +8010,23 @@ packages:
postgres-range@1.1.4:
resolution: {integrity: sha512-i/hbxIE9803Alj/6ytL7UHQxRvZkI9O4Sy+J3HGc4F4oo/2eQAjTSNJ0bfxyse3bH0nuVesCk+3IRLaMtG3H6w==}
posthog-js@1.255.1:
resolution: {integrity: sha512-KMh0o9MhORhEZVjXpktXB5rJ8PfDk+poqBoTSoLzWgNjhJf6D8jcyB9jUMA6vVPfn4YeepVX5NuclDRqOwr5Mw==}
peerDependencies:
'@rrweb/types': 2.0.0-alpha.17
rrweb-snapshot: 2.0.0-alpha.17
peerDependenciesMeta:
'@rrweb/types':
optional: true
rrweb-snapshot:
optional: true
postmark@4.0.5:
resolution: {integrity: sha512-nerZdd3TwOH4CgGboZnlUM/q7oZk0EqpZgJL+Y3Nup8kHeaukxouQ6JcFF3EJEijc4QbuNv1TefGhboAKtf/SQ==}
preact@10.26.9:
resolution: {integrity: sha512-SSjF9vcnF27mJK1XyFMNJzFd5u3pQiATFqoaDy03XuN00u4ziveVVEGt5RKJrDR8MHE/wJo9Nnad56RLzS2RMA==}
prelude-ls@1.2.1:
resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==}
engines: {node: '>= 0.8.0'}
@ -8303,6 +8372,10 @@ packages:
resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==}
engines: {node: '>=4'}
redis@5.5.6:
resolution: {integrity: sha512-hbpqBfcuhWHOS9YLNcXcJ4akNr7HFX61Dq3JuFZ9S7uU7C7kvnzuH2PDIXOP62A3eevvACoG8UacuXP3N07xdg==}
engines: {node: '>= 18'}
redlock@4.2.0:
resolution: {integrity: sha512-j+oQlG+dOwcetUt2WJWttu4CZVeRzUrcVcISFmEmfyuwCVSJ93rDT7YSgg7H7rnxwoRyk/jU46kycVka5tW7jA==}
engines: {node: '>=8.0.0'}
@ -9297,6 +9370,9 @@ packages:
wcwidth@1.0.1:
resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==}
web-vitals@4.2.4:
resolution: {integrity: sha512-r4DIlprAGwJ7YM11VZp4R884m0Vmgr6EAKe3P+kO0PPj3Unqyvv59rczf6UiGcb9Z8QxZVcqKNwv/g0WNdWwsw==}
web-worker@1.5.0:
resolution: {integrity: sha512-RiMReJrTAiA+mBjGONMnjVDP2u3p9R1vkcGz6gDIrOMT3oGuYwX2WRMYI9ipkphSuE5XKEhydbhNEJh4NY9mlw==}
@ -13002,6 +13078,26 @@ snapshots:
dependencies:
react: 18.3.1
'@redis/bloom@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/client@5.5.6':
dependencies:
cluster-key-slot: 1.1.2
'@redis/json@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/search@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/time-series@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@remirror/core-constants@3.0.0': {}
'@rollup/rollup-android-arm-eabi@4.40.0':
@ -15194,6 +15290,8 @@ snapshots:
dependencies:
browserslist: 4.24.2
core-js@3.43.0: {}
core-util-is@1.0.3: {}
cors@2.8.5:
@ -16181,6 +16279,8 @@ snapshots:
optionalDependencies:
picomatch: 4.0.2
fflate@0.4.8: {}
fflate@0.8.2: {}
figures@3.2.0:
@ -18110,6 +18210,11 @@ snapshots:
is-docker: 2.2.1
is-wsl: 2.2.0
openai@5.8.2(ws@8.18.2)(zod@3.25.56):
optionalDependencies:
ws: 8.18.2
zod: 3.25.56
openid-client@5.7.1:
dependencies:
jose: 4.15.9
@ -18482,12 +18587,21 @@ snapshots:
postgres-range@1.1.4: {}
posthog-js@1.255.1:
dependencies:
core-js: 3.43.0
fflate: 0.4.8
preact: 10.26.9
web-vitals: 4.2.4
postmark@4.0.5:
dependencies:
axios: 1.9.0
transitivePeerDependencies:
- debug
preact@10.26.9: {}
prelude-ls@1.2.1: {}
prettier@3.4.1: {}
@ -18883,6 +18997,14 @@ snapshots:
dependencies:
redis-errors: 1.2.0
redis@5.5.6:
dependencies:
'@redis/bloom': 5.5.6(@redis/client@5.5.6)
'@redis/client': 5.5.6
'@redis/json': 5.5.6(@redis/client@5.5.6)
'@redis/search': 5.5.6(@redis/client@5.5.6)
'@redis/time-series': 5.5.6(@redis/client@5.5.6)
redlock@4.2.0:
dependencies:
bluebird: 3.7.2
@ -19911,6 +20033,8 @@ snapshots:
dependencies:
defaults: 1.0.4
web-vitals@4.2.4: {}
web-worker@1.5.0: {}
webidl-conversions@3.0.1: {}