src/search/knn_search.ts

// --- START OF FILE knn_search_partitioned.ts ---

import * as distanceMetrics from "../utils/distance_metrics";
import { createTimer } from "../utils/profiling";

import { PartitionedVectorDB } from "../vector/partitioned_vector_db"; // Import Partitioned DB
import { Vector, SearchResult, SearchOptions, DistanceMetric, KNNOptionsPartitioned, KNNStatsPartitioned } from "../types";
import { LRUCache } from "lru-cache"; // Still using cache for results

/**
 * KNNEngineSearch using PartitionedVectorDB.
 * Note: Search will only be performed on partitions that are loaded into memory (LRU cache).
 */
/**
 * A class that performs K-nearest neighbors (KNN) search operations using a partitioned vector database.
 * 
 * KNNEngineSearch provides an efficient way to find similar vectors in a high-dimensional space
 * by using a partitioned database architecture. It supports different distance metrics, result caching,
 * and maintains performance statistics.
 * 
 * Features:
 * - Works with partitioned vector databases for scalable search operations
 * - Caches search results to improve performance for repeated queries
 * - Maintains statistics for performance monitoring
 * - Supports various distance metrics (euclidean by default)
 * 
 * @example
 * ```typescript
 * const db = new PartitionedVectorDB(...);
 * const knnSearch = new KNNEngineSearch(db, { metric: 'cosine' });
 * 
 * // Perform a search
 * const results = await knnSearch.findNearest(queryVector, 10, { filter: myFilter });
 * 
 * // Get performance stats
 * const stats = knnSearch.getStats();
 * ```
 */
export class KNNEngineSearch {
  private db: PartitionedVectorDB; // Using PartitionedVectorDB
  private options: Required<KNNOptionsPartitioned>;
  private distanceFunc: (a: Vector, b: Vector) => number; // Still need distance function for reference
  private timer: ReturnType<typeof createTimer>;
  private resultCache: LRUCache<string, SearchResult[]>;
  private stats: {
    calls: number;
    totalTime: number;
    lastSearchTime: number;
    cacheHits: number;
    cacheMisses: number;
  };

  constructor(
    db: PartitionedVectorDB, // Accept PartitionedVectorDB
    options: KNNOptionsPartitioned = {}
  ) {
    this.db = db;

    // Simplified default values
    const defaults = {
      metric: "euclidean" as DistanceMetric, // Default metric
      cacheResults: true,
    };

    // Merge defaults with options
    this.options = {
      ...defaults,
      ...Object.fromEntries(
        Object.entries(options).filter(([_, v]) => v !== undefined)
      ),
    } as Required<KNNOptionsPartitioned>;

    // Get distance function (may not be used directly but kept for reference)
    this.distanceFunc = distanceMetrics.getDistanceFunction(
      this.options.metric
    );

    // No longer caching normalized vectors
    // this.normalizedCache = new Map();
    // this.vectorNorms = new Map();
    // No more workers
    // this.workers = [];
    this.timer = createTimer();

    // Result cache is still useful
    this.resultCache = new LRUCache<string, SearchResult[]>({
      max: 1000, // Cache size can be adjusted
    });

    // Initialize statistics
    this.stats = {
      calls: 0,
      totalTime: 0,
      lastSearchTime: 0,
      cacheHits: 0,
      cacheMisses: 0,
    };

    // No more norm precomputation
    // if (this.options.metric === 'cosine') {
    //   this._precomputeNorms();
    // }
  }

  /**
   * Find k-nearest neighbors for the query vector.
   * Search is performed on partitions currently loaded in PartitionedVectorDB.
   */
  async findNearest(
    query: Vector,
    k: number = 10,
    options: SearchOptions = {} // Options passed down to DB (filter, etc.)
  ): Promise<SearchResult[]> {
    const timer = this.timer;
    timer.start("knn_partitioned_search");
    this.stats.calls++;

    const typedQuery =
      query instanceof Float32Array ? query : new Float32Array(query);

    // Check result cache
    if (this.options.cacheResults) {
      const cacheKey = this._getCacheKey(typedQuery, k, options);
      const cachedResults = this.resultCache.get(cacheKey);
      if (cachedResults) {
        this.stats.cacheHits++;
        const searchTime = timer.getElapsed("knn_partitioned_search");
        this.stats.lastSearchTime = searchTime;
        this.stats.totalTime += searchTime;
        timer.stop("knn_partitioned_search"); // Stop timer here for cache hit
        return [...cachedResults]; // Return a copy
      }
      this.stats.cacheMisses++;
    }

    // Call findNearest of PartitionedVectorDB
    // PartitionedDB will handle searching on loaded partitions,
    // applying filters, metrics and aggregating results.
    let results: SearchResult[];
    try {
      results = await this.db.findNearest(typedQuery, k, {
        filter: options.filter, // Pass down filter
        distanceMetric: this.options.metric, // Use metric from KNN options
        // Other options in SearchOptions can also be passed if PartitionedDB supports them
      });
    } catch (error) {
      console.error("Error during PartitionedDB findNearest:", error);
      timer.stop("knn_partitioned_search"); // Stop timer on error
      // May throw error or return empty array depending on requirements
      throw error;
    }

    // Cache results if enabled
    if (this.options.cacheResults) {
      const cacheKey = this._getCacheKey(typedQuery, k, options);
      this.resultCache.set(cacheKey, [...results]); // Store a copy
    }

    const searchTime = timer.getElapsed("knn_partitioned_search");
    this.stats.lastSearchTime = searchTime;
    this.stats.totalTime += searchTime;
    timer.stop("knn_partitioned_search");

    return results;
  }

  /**
   * Create cache key (keeping original logic)
   * @private
   */
  private _getCacheKey(
    query: Vector,
    k: number,
    options: SearchOptions
  ): string {
    const queryHash = Array.from(query)
      .map((v) => v.toFixed(4))
      .join(",");
    const filterInfo = options.filter
      ? `filterHash:${options.filter.toString().length}`
      : "noFilter"; // Simplified filter hash
    return `${queryHash}_k${k}_${this.options.metric}_${filterInfo}}`;
  }

  /**
   * Get statistics about KNN search (simplified version)
   */
  getStats(): KNNStatsPartitioned {
    return {
      calls: this.stats.calls,
      totalTime: this.stats.totalTime,
      avgTime:
        this.stats.calls > 0 ? this.stats.totalTime / this.stats.calls : 0,
      lastSearchTime: this.stats.lastSearchTime,
      cacheHits: this.stats.cacheHits,
      cacheMisses: this.stats.cacheMisses,
      cachedResultsCount: this.resultCache.size,
      options: { ...this.options },
    };
  }

  /**
   * Clear result cache
   */
  clearCache(): void {
    // No more norm/normalized cache
    // this.normalizedCache.clear();
    // this.vectorNorms.clear();
    this.resultCache.clear();
    console.log("KNN result cache cleared.");
  }

  /**
   * Release resources (mainly cache)
   */
  close(): void {
    // No more workers to terminate
    this.clearCache();
    console.log("KNNPartitioned closed (caches cleared).");
    // Note: Don't call db.close() here, PartitionedDB management is external.
  }
}