This commit is contained in:
Philipinho
2025-06-30 01:21:01 -07:00
parent 232cea8cc9
commit 0cf44914ad
17 changed files with 2613 additions and 0 deletions

View File

@ -70,6 +70,7 @@
"nanoid": "3.3.11",
"nestjs-kysely": "^1.2.0",
"nodemailer": "^7.0.3",
"openai": "^5.8.2",
"openid-client": "^5.7.1",
"passport-google-oauth20": "^2.0.0",
"passport-jwt": "^4.0.1",
@ -77,6 +78,7 @@
"pg-tsquery": "^8.4.2",
"postmark": "^4.0.5",
"react": "^18.3.1",
"redis": "^5.5.6",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.2",
"sanitize-filename-ts": "^1.0.2",

View File

@ -156,6 +156,7 @@ export class PersistenceExtension implements Extension {
page: {
...page,
content: tiptapJson,
textContent: textContent,
lastUpdatedById: context.user.id,
},
});

View File

@ -0,0 +1,444 @@
# AI Search Integration Guide
This guide shows how to integrate the AI Search module with your existing page operations for automatic indexing.
## Event-Based Auto-Indexing
The AI Search module uses event listeners to automatically index pages when they are created, updated, or deleted.
### Emitting Events in Page Service
Update your existing `PageService` to emit events for AI search indexing:
```typescript
// In your page.service.ts
import { EventEmitter2 } from '@nestjs/event-emitter';
import { Injectable } from '@nestjs/common';
@Injectable()
export class PageService {
constructor(
private readonly eventEmitter: EventEmitter2,
// ... other dependencies
) {}
async createPage(createPageDto: CreatePageDto): Promise<Page> {
// Your existing page creation logic
const page = await this.pageRepo.create(createPageDto);
// Emit event for AI search indexing
this.eventEmitter.emit('page.created', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'create'
});
return page;
}
async updatePage(pageId: string, updatePageDto: UpdatePageDto): Promise<Page> {
// Your existing page update logic
const page = await this.pageRepo.update(pageId, updatePageDto);
// Emit event for AI search reindexing
this.eventEmitter.emit('page.updated', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'update'
});
return page;
}
async deletePage(pageId: string): Promise<void> {
// Get page info before deletion
const page = await this.pageRepo.findById(pageId);
// Your existing page deletion logic
await this.pageRepo.delete(pageId);
// Emit event for AI search cleanup
if (page) {
this.eventEmitter.emit('page.deleted', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
operation: 'delete'
});
}
}
}
```
### Adding EventEmitter to Page Module
Make sure your `PageModule` imports the `EventEmitterModule`:
```typescript
// In your page.module.ts
import { Module } from '@nestjs/common';
import { EventEmitterModule } from '@nestjs/event-emitter';
import { PageService } from './services/page.service';
import { PageController } from './page.controller';
@Module({
imports: [
EventEmitterModule, // Add this if not already present
],
controllers: [PageController],
providers: [PageService],
exports: [PageService],
})
export class PageModule {}
```
### Bulk Operations
For bulk operations, you can emit multiple events or use a bulk reindex:
```typescript
async bulkUpdatePages(updates: BulkUpdateDto[]): Promise<Page[]> {
const updatedPages = await this.pageRepo.bulkUpdate(updates);
// Option 1: Emit individual events
for (const page of updatedPages) {
this.eventEmitter.emit('page.updated', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'update'
});
}
// Option 2: Use bulk reindex (more efficient for large batches)
// const pageIds = updatedPages.map(p => p.id);
// this.eventEmitter.emit('ai-search.bulk-reindex', {
// pageIds,
// workspaceId: updatedPages[0]?.workspaceId
// });
return updatedPages;
}
```
## Manual Integration
If you prefer manual control over indexing, you can directly use the AI search services:
```typescript
// In your page.service.ts
import { AiSearchService } from '../ai-search/services/ai-search.service';
@Injectable()
export class PageService {
constructor(
private readonly aiSearchService: AiSearchService,
// ... other dependencies
) {}
async createPageWithSearch(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Manually trigger indexing
try {
await this.aiSearchService.reindexPages({
pageIds: [page.id],
workspaceId: page.workspaceId
});
} catch (error) {
// Log error but don't fail the page creation
console.error('Failed to index page for AI search:', error);
}
return page;
}
}
```
## Frontend Integration
### Adding AI Search to Client
Create AI search service on the client side:
```typescript
// apps/client/src/features/ai-search/services/ai-search-service.ts
import api from "@/lib/api-client";
export interface AiSearchParams {
query: string;
spaceId?: string;
limit?: number;
similarity_threshold?: number;
}
export interface AiSearchResult {
id: string;
title: string;
icon: string;
similarity_score: number;
highlight: string;
space?: {
id: string;
name: string;
slug: string;
};
}
export async function semanticSearch(params: AiSearchParams): Promise<AiSearchResult[]> {
const response = await api.post<AiSearchResult[]>("/ai-search/semantic", params);
return response.data;
}
export async function hybridSearch(params: AiSearchParams): Promise<AiSearchResult[]> {
const response = await api.post<AiSearchResult[]>("/ai-search/hybrid", params);
return response.data;
}
```
### React Query Integration
```typescript
// apps/client/src/features/ai-search/queries/ai-search-query.ts
import { useQuery } from "@tanstack/react-query";
import { semanticSearch, hybridSearch, AiSearchParams } from "../services/ai-search-service";
export function useAiSemanticSearchQuery(params: AiSearchParams) {
return useQuery({
queryKey: ["ai-search", "semantic", params],
queryFn: () => semanticSearch(params),
enabled: !!params.query && params.query.length > 0,
});
}
export function useAiHybridSearchQuery(params: AiSearchParams) {
return useQuery({
queryKey: ["ai-search", "hybrid", params],
queryFn: () => hybridSearch(params),
enabled: !!params.query && params.query.length > 0,
});
}
```
### AI Search Component
```typescript
// apps/client/src/features/ai-search/components/ai-search-spotlight.tsx
import React, { useState } from "react";
import { Spotlight } from "@mantine/spotlight";
import { IconSearch, IconBrain } from "@tabler/icons-react";
import { useDebouncedValue } from "@mantine/hooks";
import { useAiSemanticSearchQuery } from "../queries/ai-search-query";
export function AiSearchSpotlight() {
const [query, setQuery] = useState("");
const [debouncedQuery] = useDebouncedValue(query, 300);
const { data: results, isLoading } = useAiSemanticSearchQuery({
query: debouncedQuery,
limit: 10,
similarity_threshold: 0.7,
});
return (
<Spotlight.Root query={query} onQueryChange={setQuery}>
<Spotlight.Search
placeholder="AI-powered semantic search..."
leftSection={<IconBrain size={20} />}
/>
<Spotlight.ActionsList>
{isLoading && <Spotlight.Empty>Searching...</Spotlight.Empty>}
{!isLoading && (!results || results.length === 0) && (
<Spotlight.Empty>No results found</Spotlight.Empty>
)}
{results?.map((result) => (
<Spotlight.Action key={result.id}>
<div>
<div>{result.title}</div>
<div style={{ fontSize: '0.8em', opacity: 0.7 }}>
Similarity: {(result.similarity_score * 100).toFixed(1)}%
</div>
{result.highlight && (
<div
style={{ fontSize: '0.8em', opacity: 0.6 }}
dangerouslySetInnerHTML={{ __html: result.highlight }}
/>
)}
</div>
</Spotlight.Action>
))}
</Spotlight.ActionsList>
</Spotlight.Root>
);
}
```
## Search Mode Toggle
Create a component that allows users to choose between traditional and AI search:
```typescript
// apps/client/src/features/search/components/search-mode-toggle.tsx
import { SegmentedControl } from "@mantine/core";
import { IconSearch, IconBrain } from "@tabler/icons-react";
interface SearchModeToggleProps {
value: 'traditional' | 'ai' | 'hybrid';
onChange: (value: 'traditional' | 'ai' | 'hybrid') => void;
}
export function SearchModeToggle({ value, onChange }: SearchModeToggleProps) {
return (
<SegmentedControl
value={value}
onChange={onChange}
data={[
{
label: 'Traditional',
value: 'traditional',
icon: IconSearch,
},
{
label: 'AI Semantic',
value: 'ai',
icon: IconBrain,
},
{
label: 'Hybrid',
value: 'hybrid',
icon: IconBrain,
},
]}
/>
);
}
```
## Performance Considerations
### Async Indexing
For better performance, consider making indexing asynchronous:
```typescript
// Use a queue for heavy indexing operations
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
@Injectable()
export class PageService {
constructor(
@InjectQueue('ai-search') private aiSearchQueue: Queue,
) {}
async createPage(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Queue indexing job instead of doing it synchronously
await this.aiSearchQueue.add('index-page', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
});
return page;
}
}
```
### Conditional Indexing
Only index pages when AI search is configured:
```typescript
async createPage(createPageDto: CreatePageDto): Promise<Page> {
const page = await this.pageRepo.create(createPageDto);
// Check if AI search is enabled before emitting events
if (this.embeddingService.isConfigured()) {
this.eventEmitter.emit('page.created', {
pageId: page.id,
workspaceId: page.workspaceId,
spaceId: page.spaceId,
title: page.title,
textContent: page.textContent,
operation: 'create'
});
}
return page;
}
```
## Testing Integration
### Unit Tests
```typescript
// page.service.spec.ts
import { EventEmitter2 } from '@nestjs/event-emitter';
describe('PageService', () => {
let service: PageService;
let eventEmitter: EventEmitter2;
beforeEach(async () => {
const module = await Test.createTestingModule({
providers: [
PageService,
{
provide: EventEmitter2,
useValue: {
emit: jest.fn(),
},
},
],
}).compile();
service = module.get<PageService>(PageService);
eventEmitter = module.get<EventEmitter2>(EventEmitter2);
});
it('should emit page.created event when creating page', async () => {
const createPageDto = { title: 'Test Page', content: 'Test content' };
await service.createPage(createPageDto);
expect(eventEmitter.emit).toHaveBeenCalledWith('page.created',
expect.objectContaining({
operation: 'create',
title: 'Test Page',
})
);
});
});
```
## Monitoring and Analytics
### Track Search Usage
```typescript
// Add search analytics
this.eventEmitter.emit('ai-search.query', {
query: searchParams.query,
userId: opts.userId,
workspaceId: opts.workspaceId,
searchType: 'semantic',
resultCount: results.length,
executionTime: Date.now() - startTime,
});
```
This integration approach ensures that your AI search stays in sync with your content while maintaining good performance and error handling.

View File

@ -0,0 +1,201 @@
# AI Search Module
A comprehensive AI-powered semantic search module for Docmost that integrates with Redis vector database using the official **node-redis** client to provide intelligent search capabilities following Redis vector search specifications.
## Features
- **Semantic Search**: Find content based on meaning rather than exact keywords using vector embeddings
- **Hybrid Search**: Combines both semantic and traditional full-text search with configurable weights
- **Redis Vector Database**: Uses Redis with RediSearch module for efficient vector operations via node-redis client
- **HNSW Indexing**: Hierarchical Navigable Small World algorithm for fast approximate nearest neighbor search
- **Auto-indexing**: Automatically indexes pages when they are created or updated
- **OpenAI-Compatible**: Supports OpenAI and OpenAI-compatible embedding providers
- **Batch Operations**: Efficient batch processing for large-scale indexing
- **Permission-aware**: Respects user permissions and workspace access
- **COSINE Distance**: Uses cosine distance metric for semantic similarity
## Architecture
```
ai-search/
├── ai-search.controller.ts # REST API endpoints
├── ai-search.module.ts # Module configuration
├── dto/
│ └── semantic-search.dto.ts # Request/response DTOs
├── services/
│ ├── ai-search.service.ts # Main search logic
│ ├── embedding.service.ts # Text embedding generation
│ ├── redis-vector.service.ts # Redis vector operations (node-redis)
│ └── vector.service.ts # Vector math utilities
├── listeners/
│ └── page-update.listener.ts # Auto-indexing on page changes
├── constants.ts # Configuration constants
├── README.md # This file
├── SETUP.md # Setup guide
└── INTEGRATION.md # Integration examples
```
## Configuration
Add these environment variables to your `.env` file:
```env
# Redis Vector Database (using node-redis client)
REDIS_VECTOR_HOST=localhost
REDIS_VECTOR_PORT=6379
REDIS_VECTOR_PASSWORD=your_redis_password
REDIS_VECTOR_DB=0
REDIS_VECTOR_INDEX=docmost_pages
# AI Embedding Configuration (OpenAI-compatible)
AI_EMBEDDING_MODEL=text-embedding-3-small
AI_EMBEDDING_DIMENSIONS=1536
AI_EMBEDDING_BASE_URL=https://api.openai.com/v1/embeddings # Optional: for custom providers
# OpenAI API Key (or compatible provider key)
OPENAI_API_KEY=your_openai_api_key
```
## Redis Vector Search Implementation
This implementation follows the official [Redis Vector Search specifications](https://redis.io/docs/latest/develop/interact/search-and-query/query/vector-search/) and uses the [node-redis client](https://redis.io/docs/latest/develop/clients/nodejs/vecsearch/) for proper integration.
### Key Features:
- **HNSW Algorithm**: Uses Hierarchical Navigable Small World for fast vector indexing
- **COSINE Distance**: Semantic similarity using cosine distance metric
- **KNN Queries**: K-nearest neighbors search with `*=>[KNN k @embedding $vector AS distance]`
- **Hash Storage**: Vectors stored as Redis hash documents with binary embedding data
- **node-redis Client**: Official Redis client with full vector search support
### Vector Index Schema:
```typescript
{
page_id: SchemaFieldTypes.TEXT, // Sortable page identifier
workspace_id: SchemaFieldTypes.TEXT, // Sortable workspace filter
space_id: SchemaFieldTypes.TEXT, // Space filter
title: SchemaFieldTypes.TEXT, // Page title
embedding: { // Vector field
type: SchemaFieldTypes.VECTOR,
ALGORITHM: VectorAlgorithms.HNSW, // HNSW indexing
TYPE: 'FLOAT32', // 32-bit floats
DIM: 1536, // Embedding dimensions
DISTANCE_METRIC: 'COSINE', // Cosine similarity
},
indexed_at: SchemaFieldTypes.NUMERIC // Indexing timestamp
}
```
## API Endpoints
### Semantic Search
```http
POST /ai-search/semantic
Content-Type: application/json
{
"query": "machine learning algorithms",
"spaceId": "optional-space-id",
"limit": 20,
"similarity_threshold": 0.7
}
```
### Hybrid Search
```http
POST /ai-search/hybrid
Content-Type: application/json
{
"query": "neural networks",
"spaceId": "optional-space-id",
"limit": 20
}
```
### Reindex Pages
```http
POST /ai-search/reindex
Content-Type: application/json
{
"spaceId": "optional-space-id",
"pageIds": ["page-id-1", "page-id-2"]
}
```
## Usage Examples
### Basic Semantic Search
```typescript
import { AiSearchService } from './ai-search.service';
// Search for pages semantically using vector similarity
const results = await aiSearchService.semanticSearch(
'artificial intelligence concepts',
{ limit: 10, similarity_threshold: 0.8 },
{ userId: 'user-id', workspaceId: 'workspace-id' }
);
```
### Hybrid Search with Weighted Scoring
```typescript
// Combine semantic (70%) and text search (30%)
const results = await aiSearchService.hybridSearch(
'machine learning tutorial',
{ spaceId: 'space-id', limit: 15 },
{ userId: 'user-id', workspaceId: 'workspace-id' }
);
```
## Dependencies
The module uses the official **node-redis** package for Redis integration:
```json
{
"redis": "^4.7.0"
}
```
Install with pnpm:
```bash
pnpm install
```
## Performance Optimizations
### Vector Search Performance
- **HNSW Algorithm**: Provides O(log n) search complexity
- **COSINE Distance**: Efficient for normalized embeddings
- **Batch Operations**: Multi-command execution for bulk indexing
- **Connection Pooling**: Persistent Redis connections
### Memory Efficiency
- **Float32 Vectors**: Reduced memory usage vs Float64
- **TTL Expiration**: Automatic cleanup of old vectors (30 days)
- **Prefix-based Storage**: Organized key structure
## Vector Storage Format
Vectors are stored as Redis hash documents:
```
Key: vector:{workspaceId}:{pageId}
Fields:
page_id: "page-uuid"
workspace_id: "workspace-uuid"
space_id: "space-uuid"
title: "Page Title"
embedding: Buffer<Float32Array> // Binary vector data
indexed_at: "1234567890"
```
## Error Handling
The module includes comprehensive error handling:
- **Connection Resilience**: Automatic reconnection on Redis failures
- **Embedding Retries**: Exponential backoff for API failures
- **Vector Validation**: Dimension and format checking
- **Graceful Degradation**: Fallback to text search on vector errors
This implementation provides production-ready vector search capabilities that scale with your content while maintaining excellent search quality and performance.

View File

@ -0,0 +1,224 @@
# AI Search Setup Guide
This guide will help you set up the AI Search module with Redis vector database for Docmost.
## Prerequisites
1. **Redis with RediSearch**: You need Redis with the RediSearch module for vector operations
2. **OpenAI API Key**: For embedding generation (or alternative provider)
3. **Node.js Dependencies**: The required packages are already added to package.json
## Step 1: Install Redis with RediSearch
### Option A: Using Docker (Recommended)
```bash
# Using Redis Stack (includes RediSearch and vector capabilities)
docker run -d --name redis-stack \
-p 6379:6379 \
-v redis-data:/data \
redis/redis-stack-server:latest
# Or using Redis Enterprise with RediSearch
docker run -d --name redis-vector \
-p 6379:6379 \
-v redis-data:/data \
redislabs/redisearch:latest
```
### Option B: Manual Installation
1. Install Redis from source with RediSearch module
2. Or use Redis Cloud with RediSearch enabled
## Step 2: Configure Environment Variables
Add these variables to your `.env` file:
```env
# ===== Redis Vector Database Configuration =====
REDIS_VECTOR_HOST=localhost
REDIS_VECTOR_PORT=6379
REDIS_VECTOR_PASSWORD=your_redis_password_here
REDIS_VECTOR_DB=0
REDIS_VECTOR_INDEX=docmost_pages
# ===== AI Embedding Configuration (OpenAI-compatible) =====
AI_EMBEDDING_MODEL=text-embedding-3-small
AI_EMBEDDING_DIMENSIONS=1536
AI_EMBEDDING_BASE_URL=https://api.openai.com/v1/embeddings # Optional: for custom providers
# ===== OpenAI API Key (or compatible provider key) =====
OPENAI_API_KEY=your_openai_api_key_here
```
## Step 3: Custom OpenAI-Compatible Providers
You can use any provider that follows the OpenAI embeddings API specification by setting the `AI_EMBEDDING_BASE_URL`:
### Examples:
**Azure OpenAI:**
```env
AI_EMBEDDING_BASE_URL=https://your-resource.openai.azure.com/openai/deployments/your-deployment/embeddings?api-version=2023-05-15
OPENAI_API_KEY=your_azure_openai_key
```
**Ollama (local):**
```env
AI_EMBEDDING_BASE_URL=http://localhost:11434/v1/embeddings
AI_EMBEDDING_MODEL=nomic-embed-text
AI_EMBEDDING_DIMENSIONS=768
```
**Other compatible providers:**
- Together AI
- Anyscale
- OpenRouter
- Any provider implementing OpenAI's embeddings API
## Step 4: Install Dependencies
The required dependencies are already in package.json. Run:
```bash
pnpm install
```
## Step 5: Initialize the Vector Index
The vector index will be created automatically when the service starts. You can also manually trigger reindexing:
```bash
# Using the API endpoint
curl -X POST http://localhost:3000/ai-search/reindex \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{"workspaceId": "your-workspace-id"}'
```
## Step 6: Test the Setup
### Test Semantic Search
```bash
curl -X POST http://localhost:3000/ai-search/semantic \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"query": "machine learning algorithms",
"limit": 10,
"similarity_threshold": 0.7
}'
```
### Test Hybrid Search
```bash
curl -X POST http://localhost:3000/ai-search/hybrid \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_JWT_TOKEN" \
-d '{
"query": "neural networks",
"limit": 10
}'
```
## Step 7: Monitor the Setup
### Check Redis Connection
```bash
redis-cli ping
# Should return PONG
```
### Check RediSearch Module
```bash
redis-cli MODULE LIST
# Should show RediSearch in the list
```
### Check Index Status
```bash
redis-cli FT.INFO docmost_pages
# Should show index information
```
## Troubleshooting
### Common Issues
1. **Redis Connection Error**
- Check if Redis is running: `docker ps` or `redis-cli ping`
- Verify connection details in .env file
- Check firewall/network settings
2. **RediSearch Module Not Found**
- Ensure you're using Redis Stack or Redis with RediSearch
- Check module is loaded: `redis-cli MODULE LIST`
3. **OpenAI API Errors**
- Verify API key is correct and has sufficient credits
- Check API usage limits and quotas
- Ensure model name is correct
4. **Embedding Generation Fails**
- Check text length (max 8000 characters by default)
- Verify network connectivity to embedding provider
- Check API rate limits
5. **Search Returns No Results**
- Ensure pages are indexed: check logs for indexing errors
- Verify similarity threshold (try lowering it)
- Check user permissions for searched content
### Debug Logging
Enable debug logging by setting:
```env
LOG_LEVEL=debug
```
### Performance Tuning
1. **Batch Size**: Adjust based on your API rate limits
```env
AI_SEARCH_BATCH_SIZE=50 # Lower for rate-limited APIs
```
2. **Similarity Threshold**: Balance precision vs recall
```env
AI_SEARCH_SIMILARITY_THRESHOLD=0.6 # Lower = more results
```
3. **Redis Memory**: Monitor memory usage as index grows
```bash
redis-cli INFO memory
```
## Production Deployment
### Redis Configuration
- Use Redis Cluster for high availability
- Set up proper backup and persistence
- Monitor memory usage and performance
- Configure appropriate TTL for vectors
### Security
- Use strong Redis passwords
- Enable TLS for Redis connections
- Secure API keys in environment variables
- Implement proper rate limiting
### Monitoring
- Set up alerts for Redis health
- Monitor embedding API usage and costs
- Track search performance metrics
- Log search queries for analysis
## Next Steps
1. **Auto-indexing**: Pages are automatically indexed on create/update
2. **Client Integration**: Add AI search to your frontend
3. **Custom Scoring**: Implement custom ranking algorithms
4. **Analytics**: Track search usage and effectiveness
For more detailed information, see the main README.md file.

View File

@ -0,0 +1,38 @@
import { Test, TestingModule } from '@nestjs/testing';
import { AiSearchController } from './ai-search.controller';
import { AiSearchService } from './services/ai-search.service';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
describe('AiSearchController', () => {
let controller: AiSearchController;
let service: AiSearchService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
controllers: [AiSearchController],
providers: [
{
provide: AiSearchService,
useValue: {
semanticSearch: jest.fn(),
hybridSearch: jest.fn(),
reindexPages: jest.fn(),
},
},
{
provide: SpaceAbilityFactory,
useValue: {
createForUser: jest.fn(),
},
},
],
}).compile();
controller = module.get<AiSearchController>(AiSearchController);
service = module.get<AiSearchService>(AiSearchService);
});
it('should be defined', () => {
expect(controller).toBeDefined();
});
});

View File

@ -0,0 +1,123 @@
import {
Controller,
Post,
Body,
UseGuards,
HttpCode,
HttpStatus,
BadRequestException,
ForbiddenException,
} from '@nestjs/common';
import { User } from '@docmost/db/types/entity.types';
import { Workspace } from '@docmost/db/types/entity.types';
import { AiSearchService } from './services/ai-search.service';
import { SemanticSearchDto, SemanticSearchShareDto } from './dto/semantic-search.dto';
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
import { AuthUser } from '../../common/decorators/auth-user.decorator';
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
import { SpaceCaslAction, SpaceCaslSubject } from '../casl/interfaces/space-ability.type';
import { Public } from '../../common/decorators/public.decorator';
@UseGuards(JwtAuthGuard)
@Controller('ai-search')
export class AiSearchController {
constructor(
private readonly aiSearchService: AiSearchService,
private readonly spaceAbility: SpaceAbilityFactory,
) {}
@HttpCode(HttpStatus.OK)
@Post('semantic')
async semanticSearch(
@Body() searchDto: SemanticSearchDto,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.shareId;
if (searchDto.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
searchDto.spaceId,
);
if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.semanticSearch(searchDto.query, searchDto, {
userId: user.id,
workspaceId: workspace.id,
});
}
@HttpCode(HttpStatus.OK)
@Post('hybrid')
async hybridSearch(
@Body() searchDto: SemanticSearchDto,
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.shareId;
if (searchDto.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
searchDto.spaceId,
);
if (ability.cannot(SpaceCaslAction.Read, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.hybridSearch(searchDto.query, searchDto, {
userId: user.id,
workspaceId: workspace.id,
});
}
@Public()
@HttpCode(HttpStatus.OK)
@Post('semantic-share')
async semanticSearchShare(
@Body() searchDto: SemanticSearchShareDto,
@AuthWorkspace() workspace: Workspace,
) {
delete searchDto.spaceId;
if (!searchDto.shareId) {
throw new BadRequestException('shareId is required');
}
return this.aiSearchService.semanticSearch(searchDto.query, searchDto, {
workspaceId: workspace.id,
});
}
@HttpCode(HttpStatus.OK)
@Post('reindex')
async reindexPages(
@Body() body: { spaceId?: string; pageIds?: string[] },
@AuthUser() user: User,
@AuthWorkspace() workspace: Workspace,
) {
if (body.spaceId) {
const ability = await this.spaceAbility.createForUser(
user,
body.spaceId,
);
if (ability.cannot(SpaceCaslAction.Manage, SpaceCaslSubject.Page)) {
throw new ForbiddenException();
}
}
return this.aiSearchService.reindexPages({
workspaceId: workspace.id,
spaceId: body.spaceId,
pageIds: body.pageIds,
});
}
}

View File

@ -0,0 +1,22 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { AiSearchController } from './ai-search.controller';
import { AiSearchService } from './services/ai-search.service';
import { VectorService } from './services/vector.service';
import { EmbeddingService } from './services/embedding.service';
import { RedisVectorService } from './services/redis-vector.service';
import { PageUpdateListener } from './listeners/page-update.listener';
@Module({
imports: [ConfigModule],
controllers: [AiSearchController],
providers: [
AiSearchService,
VectorService,
EmbeddingService,
RedisVectorService,
PageUpdateListener,
],
exports: [AiSearchService, VectorService, EmbeddingService, RedisVectorService],
})
export class AiSearchModule {}

View File

@ -0,0 +1,50 @@
export const AI_SEARCH_CONFIG = {
// Default similarity thresholds
DEFAULT_SIMILARITY_THRESHOLD: 0.7,
HIGH_SIMILARITY_THRESHOLD: 0.85,
LOW_SIMILARITY_THRESHOLD: 0.6,
// Search limits
MAX_SEARCH_LIMIT: 100,
DEFAULT_SEARCH_LIMIT: 20,
MIN_SEARCH_LIMIT: 1,
// Embedding configuration
DEFAULT_EMBEDDING_DIMENSIONS: 1536,
MAX_TEXT_LENGTH: 8000,
// Indexing configuration
DEFAULT_BATCH_SIZE: 100,
INDEX_TTL_DAYS: 30,
// Hybrid search weights
SEMANTIC_WEIGHT: 0.7,
TEXT_WEIGHT: 0.3,
// Redis configuration
REDIS_KEY_PREFIX: 'docmost:ai-search',
VECTOR_KEY_PREFIX: 'vector',
METADATA_KEY_PREFIX: 'metadata',
// Retry configuration
MAX_RETRIES: 3,
RETRY_DELAY_MS: 1000,
// OpenAI configuration
OPENAI_BATCH_SIZE: 100,
} as const;
export const EMBEDDING_MODELS = {
OPENAI: {
'text-embedding-3-small': 1536,
'text-embedding-3-large': 3072,
'text-embedding-ada-002': 1536,
},
} as const;
export const SEARCH_EVENTS = {
PAGE_CREATED: 'page.created',
PAGE_UPDATED: 'page.updated',
PAGE_DELETED: 'page.deleted',
BULK_REINDEX: 'ai-search.bulk-reindex',
} as const;

View File

@ -0,0 +1,103 @@
import {
IsNotEmpty,
IsString,
IsOptional,
IsNumber,
Min,
Max,
IsArray,
IsBoolean,
} from 'class-validator';
export class SemanticSearchDto {
@IsNotEmpty()
@IsString()
query: string;
@IsOptional()
@IsString()
spaceId?: string;
@IsOptional()
@IsString()
shareId?: string;
@IsOptional()
@IsString()
creatorId?: string;
@IsOptional()
@IsNumber()
@Min(1)
@Max(100)
limit?: number = 20;
@IsOptional()
@IsNumber()
@Min(0)
offset?: number = 0;
@IsOptional()
@IsNumber()
@Min(0)
@Max(1)
similarity_threshold?: number = 0.7;
@IsOptional()
@IsBoolean()
include_highlights?: boolean = true;
@IsOptional()
@IsArray()
@IsString({ each: true })
filters?: string[];
}
export class SemanticSearchShareDto extends SemanticSearchDto {
@IsNotEmpty()
@IsString()
shareId: string;
@IsOptional()
@IsString()
spaceId?: string;
}
export class SemanticSearchResponseDto {
id: string;
title: string;
icon: string;
parentPageId: string;
creatorId: string;
similarity_score: number;
semantic_rank: number;
highlight: string;
createdAt: Date;
updatedAt: Date;
space?: {
id: string;
name: string;
slug: string;
};
}
export class HybridSearchResponseDto extends SemanticSearchResponseDto {
text_rank?: number;
combined_score: number;
search_type: 'semantic' | 'text' | 'hybrid';
}
export class ReindexDto {
@IsOptional()
@IsString()
spaceId?: string;
@IsOptional()
@IsArray()
@IsString({ each: true })
pageIds?: string[];
@IsNotEmpty()
@IsString()
workspaceId: string;
}

View File

@ -0,0 +1,88 @@
import { Injectable, Logger } from '@nestjs/common';
import { OnEvent } from '@nestjs/event-emitter';
import { AiSearchService } from '../services/ai-search.service';
import { EmbeddingService } from '../services/embedding.service';
import { RedisVectorService } from '../services/redis-vector.service';
import { Page } from '@docmost/db/types/entity.types';
import { UpdatedPageEvent } from '../../../collaboration/listeners/history.listener';
export interface PageUpdateEvent {
pageId: string;
workspaceId: string;
spaceId: string;
title?: string;
textContent?: string;
operation: 'create' | 'update' | 'delete';
}
@Injectable()
export class PageUpdateListener {
private readonly logger = new Logger(PageUpdateListener.name);
constructor(
private readonly aiSearchService: AiSearchService,
private readonly embeddingService: EmbeddingService,
private readonly redisVectorService: RedisVectorService,
) {}
@OnEvent('page.created')
async handlePageCreated(event: Page) {
await this.indexPage(event);
}
@OnEvent('collab.page.updated')
async handlePageUpdated(event: UpdatedPageEvent) {
await this.indexPage(event.page);
}
@OnEvent('page.deleted')
async handlePageDeleted(event: Page) {
try {
await this.redisVectorService.deletePage(event.id, event.workspaceId);
this.logger.debug(`Removed page ${event.id} from vector index`);
} catch (error) {
this.logger.error(
`Failed to remove page ${event.id} from vector index:`,
error,
);
}
}
private async indexPage(event: Page) {
try {
const content = `${event.title || ''} ${event.textContent || ''}`.trim();
if (!content) {
this.logger.debug(
`Skipping indexing for page ${event.id} - no content`,
);
return;
}
if (!this.embeddingService.isConfigured()) {
this.logger.debug(
'Embedding service not configured, skipping indexing',
);
return;
}
const embedding = await this.embeddingService.generateEmbedding(content);
console.log('embedding', embedding);
await this.redisVectorService.indexPage({
pageId: event.id,
embedding,
metadata: {
title: event.title,
workspaceId: event.workspaceId,
spaceId: event.spaceId,
},
});
this.logger.debug(`Indexed page ${event.id} for AI search`);
} catch (error) {
this.logger.error(`Failed to index page ${event.id}:`, error);
}
}
}

View File

@ -0,0 +1,438 @@
import { Injectable, Logger } from '@nestjs/common';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import { sql } from 'kysely';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { SpaceMemberRepo } from '@docmost/db/repos/space/space-member.repo';
import { ShareRepo } from '@docmost/db/repos/share/share.repo';
import { VectorService } from './vector.service';
import { EmbeddingService } from './embedding.service';
import { RedisVectorService } from './redis-vector.service';
import {
SemanticSearchDto,
SemanticSearchResponseDto,
HybridSearchResponseDto,
ReindexDto,
} from '../dto/semantic-search.dto';
// eslint-disable-next-line @typescript-eslint/no-require-imports
const tsquery = require('pg-tsquery')();
@Injectable()
export class AiSearchService {
private readonly logger = new Logger(AiSearchService.name);
constructor(
@InjectKysely() private readonly db: KyselyDB,
private readonly pageRepo: PageRepo,
private readonly shareRepo: ShareRepo,
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly vectorService: VectorService,
private readonly embeddingService: EmbeddingService,
private readonly redisVectorService: RedisVectorService,
) {}
async semanticSearch(
query: string,
searchParams: SemanticSearchDto,
opts: {
userId?: string;
workspaceId: string;
},
): Promise<SemanticSearchResponseDto[]> {
if (query.length < 1) {
return [];
}
try {
// Generate embedding for the query
const queryEmbedding =
await this.embeddingService.generateEmbedding(query);
// Get page IDs that user has access to
const accessiblePageIds = await this.getAccessiblePageIds(
searchParams,
opts,
);
console.log('accessible', accessiblePageIds);
if (accessiblePageIds.length === 0) {
return [];
}
// Perform vector search
const vectorResults = await this.redisVectorService.searchSimilar(
queryEmbedding,
{
limit: searchParams.limit || 20,
offset: searchParams.offset || 0,
threshold: searchParams.similarity_threshold || 0.7,
filters: {
workspace_id: opts.workspaceId,
page_ids: accessiblePageIds,
},
},
);
console.log('vectorResults', vectorResults);
if (vectorResults.length === 0) {
return [];
}
// Get page details from database
const pageIds = vectorResults.map((result) => result.pageId);
const pages = await this.getPageDetails(pageIds, searchParams);
// Combine vector results with page details
const results = this.combineVectorResultsWithPages(
vectorResults,
pages,
query,
searchParams.include_highlights,
);
return results;
} catch (error) {
this.logger.error(`Semantic search failed: ${error?.['message']}`, error);
throw error;
}
}
async hybridSearch(
query: string,
searchParams: SemanticSearchDto,
opts: {
userId?: string;
workspaceId: string;
},
): Promise<HybridSearchResponseDto[]> {
if (query.length < 1) {
return [];
}
try {
// Run both semantic and text search in parallel
const [semanticResults, textResults] = await Promise.all([
this.semanticSearch(query, searchParams, opts),
this.performTextSearch(query, searchParams, opts),
]);
// Combine and rank results
const hybridResults = this.combineHybridResults(
semanticResults,
textResults,
query,
);
return hybridResults;
} catch (error) {
this.logger.error(`Hybrid search failed: ${error?.['message']}`, error);
throw error;
}
}
async reindexPages(
params: ReindexDto,
): Promise<{ indexed: number; errors?: string[] }> {
try {
let query = this.db
.selectFrom('pages')
.select(['id', 'title', 'textContent'])
.where('workspaceId', '=', params.workspaceId)
.where('deletedAt', 'is', null);
if (params.spaceId) {
query = query.where('spaceId', '=', params.spaceId);
}
if (params.pageIds && params.pageIds.length > 0) {
query = query.where('id', 'in', params.pageIds);
}
const pages = await query.execute();
const results = await Promise.allSettled(
pages.map(async (page) => {
const content =
`${page.title || ''} ${page.textContent || ''}`.trim();
if (!content) return null;
const embedding =
await this.embeddingService.generateEmbedding(content);
await this.redisVectorService.indexPage({
pageId: page.id,
embedding,
metadata: {
title: page.title,
workspaceId: params.workspaceId,
},
});
return page.id;
}),
);
const indexed = results.filter(
(r) => r.status === 'fulfilled' && r.value,
).length;
const errors = results
.filter((r) => r.status === 'rejected')
.map((r) => r.reason.message);
this.logger.log(
`Reindexed ${indexed} pages for workspace ${params.workspaceId}`,
);
return { indexed, errors: errors.length > 0 ? errors : undefined };
} catch (error) {
this.logger.error(`Reindexing failed: ${error?.['message']}`, error);
throw error;
}
}
private async getAccessiblePageIds(
searchParams: SemanticSearchDto,
opts: { userId?: string; workspaceId: string },
): Promise<string[]> {
if (searchParams.shareId) {
// Handle shared pages
const share = await this.shareRepo.findById(searchParams.shareId);
if (!share || share.workspaceId !== opts.workspaceId) {
return [];
}
const pageIdsToSearch = [];
if (share.includeSubPages) {
const pageList = await this.pageRepo.getPageAndDescendants(
share.pageId,
{ includeContent: false },
);
pageIdsToSearch.push(...pageList.map((page) => page.id));
} else {
pageIdsToSearch.push(share.pageId);
}
return pageIdsToSearch;
}
if (searchParams.spaceId) {
// Get pages from specific space
const pages = await this.db
.selectFrom('pages')
.select('id')
.where('spaceId', '=', searchParams.spaceId)
.where('workspaceId', '=', opts.workspaceId)
.where('deletedAt', 'is', null)
.execute();
return pages.map((p) => p.id);
}
if (opts.userId) {
// Get pages from user's accessible spaces
const userSpaceIds = await this.spaceMemberRepo.getUserSpaceIds(
opts.userId,
);
if (userSpaceIds.length === 0) {
return [];
}
const pages = await this.db
.selectFrom('pages')
.select('id')
.where('spaceId', 'in', userSpaceIds)
.where('workspaceId', '=', opts.workspaceId)
.where('deletedAt', 'is', null)
.execute();
return pages.map((p) => p.id);
}
return [];
}
private async getPageDetails(
pageIds: string[],
searchParams: SemanticSearchDto,
) {
let query = this.db
.selectFrom('pages')
.select([
'id',
'slugId',
'title',
'icon',
'parentPageId',
'creatorId',
'createdAt',
'updatedAt',
'textContent',
]);
if (!searchParams.shareId) {
query = query.select((eb) => this.pageRepo.withSpace(eb));
}
const pages = await query
.where('id', 'in', pageIds)
.where('deletedAt', 'is', null)
.execute();
return pages;
}
private combineVectorResultsWithPages(
vectorResults: any[],
pages: any[],
query: string,
includeHighlights: boolean = true,
): SemanticSearchResponseDto[] {
const pageMap = new Map(pages.map((p) => [p.id, p]));
return vectorResults
.map((result, index) => {
const page = pageMap.get(result.pageId);
if (!page) return null;
let highlight = '';
if (includeHighlights && page.textContent) {
highlight = this.generateHighlight(page.textContent, query);
}
return {
id: page.id,
title: page.title,
icon: page.icon,
parentPageId: page.parentPageId,
creatorId: page.creatorId,
similarity_score: result.score,
semantic_rank: index + 1,
highlight,
createdAt: page.createdAt,
updatedAt: page.updatedAt,
space: page.space
? {
id: page.space.id,
name: page.space.name,
slug: page.space.slug,
}
: undefined,
};
})
.filter(Boolean);
}
private async performTextSearch(
query: string,
searchParams: SemanticSearchDto,
opts: { userId?: string; workspaceId: string },
) {
const searchQuery = tsquery(query.trim() + '*');
const accessiblePageIds = await this.getAccessiblePageIds(
searchParams,
opts,
);
if (accessiblePageIds.length === 0) {
return [];
}
const results = await this.db
.selectFrom('pages')
.select([
'id',
'slugId',
'title',
'icon',
'parentPageId',
'creatorId',
'createdAt',
'updatedAt',
sql<number>`ts_rank(tsv, to_tsquery(${searchQuery}))`.as('text_rank'),
sql<string>`ts_headline('english', text_content, to_tsquery(${searchQuery}),'MinWords=9, MaxWords=10, MaxFragments=3')`.as(
'highlight',
),
])
.where('tsv', '@@', sql<string>`to_tsquery(${searchQuery})`)
.where('id', 'in', accessiblePageIds)
.orderBy('text_rank', 'desc')
.limit(searchParams.limit || 20)
.execute();
return results.map((result) => ({
...result,
text_rank: result.text_rank,
search_type: 'text' as const,
}));
}
private combineHybridResults(
semanticResults: SemanticSearchResponseDto[],
textResults: any[],
query: string,
): HybridSearchResponseDto[] {
const combinedMap = new Map<string, HybridSearchResponseDto>();
// Add semantic results
semanticResults.forEach((result, index) => {
combinedMap.set(result.id, {
...result,
text_rank: undefined,
combined_score: result.similarity_score * 0.7, // Weight semantic results
search_type: 'semantic',
});
});
// Add text results or combine with existing
textResults.forEach((result, index) => {
const existing = combinedMap.get(result.id);
if (existing) {
// Combine scores
existing.combined_score =
existing.similarity_score * 0.7 + result.text_rank * 0.3;
existing.text_rank = result.text_rank;
existing.search_type = 'hybrid';
} else {
combinedMap.set(result.id, {
id: result.id,
title: result.title,
icon: result.icon,
parentPageId: result.parentPageId,
creatorId: result.creatorId,
similarity_score: 0,
semantic_rank: 0,
text_rank: result.text_rank,
combined_score: result.text_rank * 0.3,
highlight: result.highlight,
createdAt: result.createdAt,
updatedAt: result.updatedAt,
search_type: 'text',
});
}
});
// Sort by combined score
return Array.from(combinedMap.values())
.sort((a, b) => b.combined_score - a.combined_score)
.slice(0, 20);
}
private generateHighlight(content: string, query: string): string {
if (!content) return '';
const words = query.toLowerCase().split(/\s+/);
const sentences = content.split(/[.!?]+/);
for (const sentence of sentences) {
const lowerSentence = sentence.toLowerCase();
if (words.some((word) => lowerSentence.includes(word))) {
return sentence.trim().substring(0, 200) + '...';
}
}
return content.substring(0, 200) + '...';
}
}

View File

@ -0,0 +1,185 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import OpenAI from 'openai';
export interface EmbeddingConfig {
model: string;
apiKey?: string;
baseUrl?: string;
dimensions: number;
}
export interface EmbeddingResult {
embedding: number[];
tokens: number;
model: string;
}
@Injectable()
export class EmbeddingService {
private readonly logger = new Logger(EmbeddingService.name);
private readonly config: EmbeddingConfig;
private readonly openai: OpenAI;
constructor(private readonly configService: ConfigService) {
this.config = {
model: this.configService.get<string>(
'AI_EMBEDDING_MODEL',
'text-embedding-3-small',
),
apiKey: this.configService.get<string>('OPENAI_API_KEY'),
baseUrl: 'https://api.openai.com/v1/',
dimensions: Number(
this.configService.get<string>('AI_EMBEDDING_DIMENSIONS', '1536'),
),
};
if (!this.config.apiKey) {
this.logger.warn(
'OpenAI API key not configured. AI search will not work.',
);
}
// Initialize OpenAI client with optional custom base URL
this.openai = new OpenAI({
apiKey: this.config.apiKey || 'dummy-key',
baseURL: this.config.baseUrl,
});
}
/**
* Generate embedding for a single text
*/
async generateEmbedding(text: string): Promise<number[]> {
if (!text || text.trim().length === 0) {
throw new Error('Text cannot be empty');
}
const cleanText = this.preprocessText(text);
console.log('generate clean text', cleanText);
try {
const result = await this.generateEmbeddingWithOpenAI(cleanText);
console.log('embedding results', result);
return result.embedding;
} catch (error) {
this.logger.error(`Embedding generation failed:`, error);
}
}
/**
* Generate embeddings for multiple texts in batch
*/
async generateEmbeddings(texts: string[]): Promise<number[][]> {
if (!texts || texts.length === 0) {
return [];
}
const cleanTexts = texts.map((text) => this.preprocessText(text));
const batchSize = this.getBatchSize();
const results: number[][] = [];
for (let i = 0; i < cleanTexts.length; i += batchSize) {
const batch = cleanTexts.slice(i, i + batchSize);
try {
const batchResults = await this.generateBatchEmbeddings(batch);
results.push(...batchResults);
} catch (error) {
this.logger.error(
`Batch embedding generation failed for batch ${i}:`,
error,
);
throw error;
}
}
return results;
}
/**
* Generate embedding using OpenAI API
*/
private async generateEmbeddingWithOpenAI(
text: string,
): Promise<EmbeddingResult> {
const response = await this.openai.embeddings.create({
model: this.config.model,
input: text,
dimensions: this.config.dimensions,
});
if (!response.data || response.data.length === 0) {
throw new Error('Invalid response from OpenAI API');
}
return {
embedding: response.data[0].embedding,
tokens: response.usage?.total_tokens || 0,
model: this.config.model,
};
}
/**
* Generate embeddings for multiple texts
*/
private async generateBatchEmbeddings(texts: string[]): Promise<number[][]> {
const response = await this.openai.embeddings.create({
model: this.config.model,
input: texts,
dimensions: this.config.dimensions,
});
if (!response.data || !Array.isArray(response.data)) {
throw new Error('Invalid response from OpenAI API');
}
return response.data.map((item) => item.embedding);
}
/**
* Preprocess text before embedding generation
*/
private preprocessText(text: string): string {
if (!text) return '';
// Remove excessive whitespace
let processed = text.replace(/\s+/g, ' ').trim();
// Truncate if too long (most models have token limits)
const maxLength = 8000; // Conservative limit
if (processed.length > maxLength) {
processed = processed.substring(0, maxLength);
}
return processed;
}
/**
* Get batch size for OpenAI API
*/
private getBatchSize(): number {
return 100; // OpenAI supports up to 2048 inputs
}
/**
* Sleep utility for retries
*/
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Check if embedding service is configured
*/
isConfigured(): boolean {
return !!this.config.apiKey;
}
/**
* Get embedding configuration
*/
getConfig(): EmbeddingConfig {
return { ...this.config };
}
}

View File

@ -0,0 +1,393 @@
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
import {
VectorSearchOptions,
VectorSearchResult,
VectorService,
} from './vector.service';
import {
createClient,
RedisClientType,
SCHEMA_FIELD_TYPE,
SCHEMA_VECTOR_FIELD_ALGORITHM,
} from 'redis';
import { EnvironmentService } from '../../../integrations/environment/environment.service';
export interface IndexPageData {
pageId: string;
embedding: number[];
metadata: {
title?: string;
workspaceId: string;
spaceId?: string;
[key: string]: any;
};
}
export interface RedisVectorConfig {
host: string;
port: number;
password?: string;
db?: number;
indexName: string;
vectorDimension: number;
}
@Injectable()
export class RedisVectorService implements OnModuleDestroy {
private readonly logger = new Logger(RedisVectorService.name);
private readonly redis: RedisClientType;
private readonly config: RedisVectorConfig;
private isIndexCreated = false;
constructor(
private readonly environmentService: EnvironmentService,
private readonly vectorService: VectorService,
) {
//@ts-ignore
this.config = {
indexName: 'docmost_pages_index',
vectorDimension: 1536, //AI_EMBEDDING_DIMENSIONS
};
this.redis = createClient({
url: this.environmentService.getRedisUrl(),
});
this.redis.on('error', (err) => {
this.logger.error('Redis Client Error:', err);
});
this.initializeConnection();
}
async searchSimilar(
queryEmbedding: number[],
options: VectorSearchOptions,
): Promise<VectorSearchResult[]> {
try {
await this.ensureIndexExists();
const { limit = 20, offset = 0, threshold = 0.7, filters } = options;
// Build query following Redis specs
let query = `*=>[KNN ${limit + offset} @embedding $vector AS score]`;
// Apply filters if provided
if (filters && Object.keys(filters).length > 0) {
const filterClauses = Object.entries(filters).map(([key, value]) => {
if (Array.isArray(value)) {
return `@${key}:{${value.join('|')}}`;
}
return `@${key}:${value}`;
});
query = `(${filterClauses.join(' ')})=>[KNN ${limit + offset} @embedding $vector AS score]`;
}
// Execute search using proper node-redis syntax
const searchOptions = {
PARAMS: {
vector: Buffer.from(new Float32Array(queryEmbedding).buffer),
},
SORTBY: {
BY: '@score' as `@${string}`,
DIRECTION: 'ASC' as 'ASC',
},
LIMIT: {
from: offset,
size: limit,
},
RETURN: ['page_id', 'workspace_id', 'space_id', 'title', 'score'],
DIALECT: 2,
};
console.log(searchOptions);
//is not assignable to parameter of type FtSearchOptions
// Types of property SORTBY are incompatible.
// Type { BY: string; DIRECTION: string; } is not assignable to type
// RedisArgument | { BY: `@${string}` | `$.${string}`; DIRECTION?: 'DESC' | 'ASC'; }
const searchResult = await this.redis.ft.search(
this.config.indexName,
query,
searchOptions,
);
const results = this.parseSearchResults(searchResult, threshold);
this.logger.debug(`Vector search found ${results.length} results`);
return results;
} catch (error) {
this.logger.error('Vector search failed:', error);
throw new Error(`Vector search failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
async indexPage(data: IndexPageData): Promise<void> {
try {
await this.ensureIndexExists();
const key = this.vectorService.createVectorKey(
data.pageId,
data.metadata.workspaceId,
);
// Store vector and metadata using proper node-redis hash operations
await this.redis.hSet(key, {
page_id: data.pageId,
workspace_id: data.metadata.workspaceId,
space_id: data.metadata.spaceId || '',
title: data.metadata.title || '',
embedding: Buffer.from(new Float32Array(data.embedding).buffer),
indexed_at: Date.now().toString(),
});
// Set TTL for the key
await this.redis.expire(key, 86400 * 30); // 30 days TTL
this.logger.debug(
`Indexed page ${data.pageId} in workspace ${data.metadata.workspaceId}`,
);
} catch (error) {
this.logger.error(
`Failed to index page ${data.pageId}: ${error?.['message']}`,
error,
);
throw error;
}
}
async deletePage(pageId: string, workspaceId: string): Promise<void> {
try {
const key = this.vectorService.createVectorKey(pageId, workspaceId);
await this.redis.del(key);
this.logger.debug(`Deleted page ${pageId} from vector index`);
} catch (error) {
this.logger.error(
`Failed to delete page ${pageId}: ${error?.['message']}`,
error,
);
throw error;
}
}
async batchIndexPages(
pages: IndexPageData[],
): Promise<{ indexed: number; errors: string[] }> {
const errors: string[] = [];
let indexed = 0;
try {
await this.ensureIndexExists();
// Process in batches to avoid memory issues
const batchSize = 100;
for (let i = 0; i < pages.length; i += batchSize) {
const batch = pages.slice(i, i + batchSize);
// Use node-redis multi for batch operations
const multi = this.redis.multi();
for (const page of batch) {
try {
const key = this.vectorService.createVectorKey(
page.pageId,
page.metadata.workspaceId,
);
multi.hSet(key, {
page_id: page.pageId,
workspace_id: page.metadata.workspaceId,
space_id: page.metadata.spaceId || '',
title: page.metadata.title || '',
embedding: Buffer.from(new Float32Array(page.embedding).buffer),
indexed_at: Date.now().toString(),
});
multi.expire(key, 86400 * 30);
} catch (error) {
errors.push(`Page ${page.pageId}: ${error?.['message']}`);
}
}
const results = await multi.exec();
// Count successful operations
const batchIndexed =
//@ts-ignore
results?.filter((result) => !result.error).length || 0;
indexed += Math.floor(batchIndexed / 2); // Each page has 2 operations (hSet + expire)
}
this.logger.log(
`Batch indexed ${indexed} pages with ${errors.length} errors`,
);
return { indexed, errors };
} catch (error) {
this.logger.error(`Batch indexing failed: ${error?.['message']}`, error);
throw error;
}
}
private async initializeConnection(): Promise<void> {
try {
await this.redis.connect();
console.log('create');
await this.createIndex();
this.isIndexCreated = true;
this.logger.log('Redis vector database connected and index initialized');
} catch (error) {
this.logger.error(
`Failed to initialize vector index: ${error?.['message']}`,
error,
);
console.error(error);
}
}
private async ensureIndexExists(): Promise<void> {
console.log('creating index 1111');
if (!this.isIndexCreated) {
console.log('creating index');
await this.createIndex();
this.isIndexCreated = true;
}
}
private async createIndex(): Promise<void> {
try {
// Check if index already exists using proper node-redis syntax
await this.redis.ft.info(this.config.indexName);
this.logger.debug(`Vector index ${this.config.indexName} already exists`);
return;
} catch (error) {
// Index doesn't exist, create it
}
try {
// Create index using proper node-redis schema definition
await this.redis.ft.create(
this.config.indexName,
{
page_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
SORTABLE: true,
},
workspace_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
SORTABLE: true,
},
space_id: {
type: SCHEMA_FIELD_TYPE.TEXT,
},
title: {
type: SCHEMA_FIELD_TYPE.TEXT,
},
embedding: {
type: SCHEMA_FIELD_TYPE.VECTOR,
ALGORITHM: SCHEMA_VECTOR_FIELD_ALGORITHM.HNSW,
TYPE: 'FLOAT32',
DIM: this.config.vectorDimension,
DISTANCE_METRIC: 'COSINE',
},
indexed_at: {
type: SCHEMA_FIELD_TYPE.NUMERIC,
SORTABLE: true,
},
},
{
ON: 'HASH',
PREFIX: 'vector:',
},
);
this.logger.log(`Created vector index ${this.config.indexName}`);
} catch (error) {
if (error?.['message']?.includes('Index already exists')) {
this.logger.debug('Vector index already exists');
} else {
throw error;
}
}
}
private parseSearchResults(
results: any,
threshold: number,
): VectorSearchResult[] {
if (!results?.documents || results.documents.length === 0) {
return [];
}
const parsed: VectorSearchResult[] = [];
for (const doc of results.documents) {
const distance = parseFloat(doc.value?.distance || '1');
const similarity = 1 - distance; // Convert distance to similarity
if (similarity >= threshold) {
parsed.push({
pageId: doc.value?.page_id || doc.id.split(':')[1],
score: similarity,
metadata: {
workspaceId: doc.value?.workspace_id,
spaceId: doc.value?.space_id,
title: doc.value?.title,
distance,
},
});
}
}
return parsed;
}
async getIndexStats(): Promise<{
totalDocs: number;
indexSize: string;
vectorCount: number;
}> {
try {
const info = await this.redis.ft.info(this.config.indexName);
return {
//@ts-ignore
totalDocs: info.numDocs || 0,
//@ts-ignore
indexSize: info.indexSize || '0',
//@ts-ignore
vectorCount: info.numDocs || 0,
};
} catch (error) {
this.logger.error(`Failed to get index stats: ${error?.['message']}`);
return { totalDocs: 0, indexSize: '0', vectorCount: 0 };
}
}
async deleteIndex(): Promise<void> {
try {
await this.redis.ft.dropIndex(this.config.indexName);
this.isIndexCreated = false;
this.logger.log(`Deleted vector index ${this.config.indexName}`);
} catch (error) {
this.logger.error(`Failed to delete index: ${error?.['message']}`);
throw error;
}
}
async disconnect(): Promise<void> {
try {
await this.redis.quit();
this.logger.log('Redis vector database disconnected');
} catch (error) {
this.logger.error(
`Failed to disconnect from Redis: ${error?.['message']}`,
);
}
}
async onModuleDestroy() {
await this.disconnect();
}
}

View File

@ -0,0 +1,216 @@
import { Injectable, Logger } from '@nestjs/common';
export interface VectorSearchResult {
pageId: string;
score: number;
metadata?: Record<string, any>;
}
export interface VectorSearchOptions {
limit?: number;
offset?: number;
threshold?: number;
filters?: Record<string, any>;
}
@Injectable()
export class VectorService {
private readonly logger = new Logger(VectorService.name);
/**
* Calculate cosine similarity between two vectors
*/
cosineSimilarity(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vectorA.length; i++) {
dotProduct += vectorA[i] * vectorB[i];
normA += vectorA[i] * vectorA[i];
normB += vectorB[i] * vectorB[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
if (magnitude === 0) {
return 0;
}
return dotProduct / magnitude;
}
/**
* Calculate Euclidean distance between two vectors
*/
euclideanDistance(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let sum = 0;
for (let i = 0; i < vectorA.length; i++) {
const diff = vectorA[i] - vectorB[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
/**
* Calculate dot product similarity
*/
dotProductSimilarity(vectorA: number[], vectorB: number[]): number {
if (vectorA.length !== vectorB.length) {
throw new Error('Vectors must have the same length');
}
let dotProduct = 0;
for (let i = 0; i < vectorA.length; i++) {
dotProduct += vectorA[i] * vectorB[i];
}
return dotProduct;
}
/**
* Normalize a vector to unit length
*/
normalizeVector(vector: number[]): number[] {
const magnitude = Math.sqrt(
vector.reduce((sum, val) => sum + val * val, 0),
);
if (magnitude === 0) {
return vector;
}
return vector.map((val) => val / magnitude);
}
/**
* Convert vector to string format for Redis storage
*/
vectorToString(vector: number[]): string {
return vector.join(',');
}
/**
* Parse vector from string format
*/
stringToVector(vectorString: string): number[] {
return vectorString.split(',').map((val) => parseFloat(val));
}
/**
* Validate vector format and dimensions
*/
validateVector(vector: number[], expectedDimensions?: number): boolean {
if (!Array.isArray(vector)) {
return false;
}
if (vector.length === 0) {
return false;
}
if (expectedDimensions && vector.length !== expectedDimensions) {
return false;
}
return vector.every((val) => typeof val === 'number' && !isNaN(val));
}
/**
* Calculate similarity score with configurable method
*/
calculateSimilarity(
vectorA: number[],
vectorB: number[],
method: 'cosine' | 'euclidean' | 'dot' = 'cosine',
): number {
switch (method) {
case 'cosine':
return this.cosineSimilarity(vectorA, vectorB);
case 'euclidean': // Convert distance to similarity (0-1 scale)
{
const distance = this.euclideanDistance(vectorA, vectorB);
return 1 / (1 + distance);
}
case 'dot':
return this.dotProductSimilarity(vectorA, vectorB);
default:
throw new Error(`Unsupported similarity method: ${method}`);
}
}
/**
* Filter results by similarity threshold
*/
filterByThreshold(
results: VectorSearchResult[],
threshold: number,
): VectorSearchResult[] {
return results.filter((result) => result.score >= threshold);
}
/**
* Sort results by similarity score (descending)
*/
sortByScore(results: VectorSearchResult[]): VectorSearchResult[] {
return results.sort((a, b) => b.score - a.score);
}
/**
* Apply pagination to results
*/
paginateResults(
results: VectorSearchResult[],
offset: number = 0,
limit: number = 20,
): VectorSearchResult[] {
return results.slice(offset, offset + limit);
}
/**
* Create vector index key for Redis
*/
createVectorKey(pageId: string, workspaceId: string): string {
return `vector:${workspaceId}:${pageId}`;
}
/**
* Create metadata key for Redis
*/
createMetadataKey(pageId: string, workspaceId: string): string {
return `metadata:${workspaceId}:${pageId}`;
}
/**
* Batch process vectors with chunking
*/
async batchProcess<T, R>(
items: T[],
processor: (batch: T[]) => Promise<R[]>,
batchSize: number = 100,
): Promise<R[]> {
const results: R[] = [];
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
try {
const batchResults = await processor(batch);
results.push(...batchResults);
} catch (error) {
this.logger.error(
`Batch processing failed for items ${i}-${i + batch.length}:`,
error,
);
throw error;
}
}
return results;
}
}

View File

@ -11,6 +11,7 @@ import { PageModule } from './page/page.module';
import { AttachmentModule } from './attachment/attachment.module';
import { CommentModule } from './comment/comment.module';
import { SearchModule } from './search/search.module';
import { AiSearchModule } from './ai-search/ai-search.module';
import { SpaceModule } from './space/space.module';
import { GroupModule } from './group/group.module';
import { CaslModule } from './casl/casl.module';
@ -26,6 +27,7 @@ import { ShareModule } from './share/share.module';
AttachmentModule,
CommentModule,
SearchModule,
AiSearchModule,
SpaceModule,
GroupModule,
CaslModule,

83
pnpm-lock.yaml generated
View File

@ -531,6 +531,9 @@ importers:
nodemailer:
specifier: ^7.0.3
version: 7.0.3
openai:
specifier: ^5.8.2
version: 5.8.2(ws@8.18.2)(zod@3.25.56)
openid-client:
specifier: ^5.7.1
version: 5.7.1
@ -552,6 +555,9 @@ importers:
react:
specifier: ^18.3.1
version: 18.3.1
redis:
specifier: ^5.5.6
version: 5.5.6
reflect-metadata:
specifier: ^0.2.2
version: 0.2.2
@ -3349,6 +3355,34 @@ packages:
peerDependencies:
react: ^18.0 || ^19.0 || ^19.0.0-rc
'@redis/bloom@5.5.6':
resolution: {integrity: sha512-bNR3mxkwtfuCxNOzfV8B3R5zA1LiN57EH6zK4jVBIgzMzliNuReZXBFGnXvsi80/SYohajn78YdpYI+XNpqL+A==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/client@5.5.6':
resolution: {integrity: sha512-M3Svdwt6oSfyfQdqEr0L2HOJH2vK7GgCFx1NfAQvpWAT4+ljoT1L5S5cKT3dA9NJrxrOPDkdoTPWJnIrGCOcmw==}
engines: {node: '>= 18'}
'@redis/json@5.5.6':
resolution: {integrity: sha512-AIsoe3SsGQagqAmSQHaqxEinm5oCWr7zxPWL90kKaEdLJ+zw8KBznf2i9oK0WUFP5pFssSQUXqnscQKe2amfDQ==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/search@5.5.6':
resolution: {integrity: sha512-JSqasYqO0mVcHL7oxvbySRBBZYRYhFl3W7f0Da7BW8M/r0Z9wCiVrdjnN4/mKBpWZkoJT/iuisLUdPGhpKxBew==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@redis/time-series@5.5.6':
resolution: {integrity: sha512-jkpcgq3NOI3TX7xEAJ3JgesJTxAx7k0m6lNxNsYdEM8KOl+xj7GaB/0CbLkoricZDmFSEAz7ClA1iK9XkGHf+Q==}
engines: {node: '>= 18'}
peerDependencies:
'@redis/client': ^5.5.6
'@remirror/core-constants@3.0.0':
resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==}
@ -7604,6 +7638,18 @@ packages:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
openai@5.8.2:
resolution: {integrity: sha512-8C+nzoHYgyYOXhHGN6r0fcb4SznuEn1R7YZMvlqDbnCuE0FM2mm3T1HiYW6WIcMS/F1Of2up/cSPjLPaWt0X9Q==}
hasBin: true
peerDependencies:
ws: ^8.18.0
zod: ^3.23.8
peerDependenciesMeta:
ws:
optional: true
zod:
optional: true
openid-client@5.7.1:
resolution: {integrity: sha512-jDBPgSVfTnkIh71Hg9pRvtJc6wTwqjRkN88+gCFtYWrlP4Yx2Dsrow8uPi3qLr/aeymPF3o2+dS+wOpglK04ew==}
@ -8326,6 +8372,10 @@ packages:
resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==}
engines: {node: '>=4'}
redis@5.5.6:
resolution: {integrity: sha512-hbpqBfcuhWHOS9YLNcXcJ4akNr7HFX61Dq3JuFZ9S7uU7C7kvnzuH2PDIXOP62A3eevvACoG8UacuXP3N07xdg==}
engines: {node: '>= 18'}
redlock@4.2.0:
resolution: {integrity: sha512-j+oQlG+dOwcetUt2WJWttu4CZVeRzUrcVcISFmEmfyuwCVSJ93rDT7YSgg7H7rnxwoRyk/jU46kycVka5tW7jA==}
engines: {node: '>=8.0.0'}
@ -13028,6 +13078,26 @@ snapshots:
dependencies:
react: 18.3.1
'@redis/bloom@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/client@5.5.6':
dependencies:
cluster-key-slot: 1.1.2
'@redis/json@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/search@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@redis/time-series@5.5.6(@redis/client@5.5.6)':
dependencies:
'@redis/client': 5.5.6
'@remirror/core-constants@3.0.0': {}
'@rollup/rollup-android-arm-eabi@4.40.0':
@ -18140,6 +18210,11 @@ snapshots:
is-docker: 2.2.1
is-wsl: 2.2.0
openai@5.8.2(ws@8.18.2)(zod@3.25.56):
optionalDependencies:
ws: 8.18.2
zod: 3.25.56
openid-client@5.7.1:
dependencies:
jose: 4.15.9
@ -18922,6 +18997,14 @@ snapshots:
dependencies:
redis-errors: 1.2.0
redis@5.5.6:
dependencies:
'@redis/bloom': 5.5.6(@redis/client@5.5.6)
'@redis/client': 5.5.6
'@redis/json': 5.5.6(@redis/client@5.5.6)
'@redis/search': 5.5.6(@redis/client@5.5.6)
'@redis/time-series': 5.5.6(@redis/client@5.5.6)
redlock@4.2.0:
dependencies:
bluebird: 3.7.2