feat: vectorchord (#18042)

* wip

auto-detect available extensions

auto-recovery, fix reindexing check

use original image for ml

* set probes

* update image for sql checker

update images for gha

* cascade

* fix new instance

* accurate dummy vector

* simplify dummy

* preexisiting pg docs

* handle different db name

* maybe fix sql generation

* revert refreshfaces sql change

* redundant switch

* outdated message

* update docker compose files

* Update docs/docs/administration/postgres-standalone.md

Co-authored-by: Daniel Dietzler <36593685+danieldietzler@users.noreply.github.com>

* tighten range

* avoid always printing "vector reindexing complete"

* remove nesting

* use new images

* add vchord to unit tests

* debug e2e image

* mention 1.107.2 in startup error

* support new vchord versions

---------

Co-authored-by: Daniel Dietzler <36593685+danieldietzler@users.noreply.github.com>
This commit is contained in:
Mert 2025-05-20 09:36:43 -04:00 committed by GitHub
parent fe71894308
commit 0d773af6c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 572 additions and 444 deletions

View file

@ -384,14 +384,28 @@ export function searchAssetBuilder(kysely: Kysely<DB>, options: AssetSearchBuild
.$if(!options.withDeleted, (qb) => qb.where('assets.deletedAt', 'is', null));
}
type VectorIndexOptions = { vectorExtension: VectorExtension; table: string; indexName: string };
export type ReindexVectorIndexOptions = { indexName: string; lists?: number };
export function vectorIndexQuery({ vectorExtension, table, indexName }: VectorIndexOptions): string {
type VectorIndexQueryOptions = { table: string; vectorExtension: VectorExtension } & ReindexVectorIndexOptions;
export function vectorIndexQuery({ vectorExtension, table, indexName, lists }: VectorIndexQueryOptions): string {
switch (vectorExtension) {
case DatabaseExtension.VECTORCHORD: {
return `
CREATE INDEX IF NOT EXISTS ${indexName} ON ${table} USING vchordrq (embedding vector_cosine_ops) WITH (options = $$
residual_quantization = false
[build.internal]
lists = [${lists ?? 1}]
spherical_centroids = true
build_threads = 4
sampling_factor = 1024
$$)`;
}
case DatabaseExtension.VECTORS: {
return `
CREATE INDEX IF NOT EXISTS ${indexName} ON ${table}
USING vectors (embedding vector_cos_ops) WITH (options = $$
optimizing.optimizing_threads = 4
[indexing.hnsw]
m = 16
ef_construction = 300