rewriten migration

This commit is contained in:
lehel 2025-10-08 21:58:29 +02:00
parent c63890b104
commit edc9d3d667
No known key found for this signature in database
GPG Key ID: 9C4F9D6111EE5CFA
7 changed files with 26 additions and 41 deletions

View File

@ -1,16 +1,16 @@
-- +goose Up
CREATE EXTENSION IF NOT EXISTS vector;
-- Create sentence_embeddings table using standard Postgres types (no vector extension)
CREATE TABLE sentence_embeddings (
id SERIAL PRIMARY KEY,
visit_id INTEGER NOT NULL,
sentence TEXT NOT NULL,
translated TEXT,
embeddings VECTOR(1536) NOT NULL,
embeddings FLOAT[] NOT NULL, -- Using standard float array instead of vector
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Create unique index for efficient lookups and preventing duplicates
CREATE UNIQUE INDEX idx_sentence_embeddings_visit_sentence ON sentence_embeddings (visit_id, sentence);
-- +goose Down
DROP TABLE IF EXISTS sentence_embeddings;
DROP EXTENSION IF EXISTS vector;

View File

@ -1,8 +1,8 @@
-- +goose Up
-- Altering visit_id type, keeping compatibility with standard Postgres types
ALTER TABLE sentence_embeddings
ALTER COLUMN visit_id TYPE TEXT USING visit_id::text;
ALTER COLUMN visit_id TYPE TEXT;
-- +goose Down
ALTER TABLE sentence_embeddings
ALTER COLUMN visit_id TYPE INTEGER USING visit_id::integer;
ALTER COLUMN visit_id TYPE INTEGER USING (visit_id::integer);

View File

@ -1,6 +1,6 @@
-- +goose Up
CREATE UNIQUE INDEX IF NOT EXISTS ux_sentence_embeddings_visit_sentence
ON sentence_embeddings(visit_id, sentence);
-- The unique index was already created in migration 0003 when we switched to standard Postgres types
-- This migration is kept for consistency in migration sequence but doesn't perform any action
-- +goose Down
DROP INDEX IF EXISTS ux_sentence_embeddings_visit_sentence;
-- No action needed for rollback

View File

@ -1,14 +1,6 @@
-- +goose Up
-- WARNING: This alters the embeddings vector dimension from 1536 to 384.
-- Ensure you are switching to a 384-dim embedding model (e.g., all-minilm).
-- If existing rows have 1536-d vectors this command will fail; you may need to
-- TRUNCATE TABLE sentence_embeddings first (after backing up) before applying.
ALTER TABLE sentence_embeddings
ALTER COLUMN embeddings TYPE vector(384);
-- Update schema to support 384-dimensional embeddings using standard Postgres types
-- No need to modify column type as we're now using a flexible FLOAT[] array
-- +goose Down
-- Revert to 1536 dimensions (for models like OpenAI text-embedding-3-large).
-- Will fail if existing rows are 384-d.
ALTER TABLE sentence_embeddings
ALTER COLUMN embeddings TYPE vector(1536);
-- No action needed for rollback since we're using a flexible array type

View File

@ -1,13 +1,10 @@
-- +goose Up
-- Add separate columns for different embedding dimensions.
-- Existing 'embeddings' column (if present) is left untouched for backward compatibility.
-- Application code will now populate embedding_384 or embedding_1536 instead.
-- Add separate columns for different embedding dimensions using standard Postgres FLOAT[] arrays
ALTER TABLE sentence_embeddings
ADD COLUMN IF NOT EXISTS embedding_384 vector(384),
ADD COLUMN IF NOT EXISTS embedding_1536 vector(1536);
ADD COLUMN IF NOT EXISTS embedding_384 FLOAT[],
ADD COLUMN IF NOT EXISTS embedding_1536 FLOAT[];
-- +goose Down
ALTER TABLE sentence_embeddings
DROP COLUMN IF EXISTS embedding_384,
DROP COLUMN IF EXISTS embedding_1536;

View File

@ -1,11 +1,7 @@
-- +goose Up
-- Drop the legacy single-dimension embeddings column (was NOT NULL) to allow inserts
-- that now use embedding_384 / embedding_1536. All new data goes into those columns.
-- Drop legacy embeddings column as it's been replaced by embedding_384 and embedding_1536
ALTER TABLE sentence_embeddings
DROP COLUMN IF EXISTS embeddings;
-- +goose Down
-- Re-create the legacy embeddings column (empty) as vector(1536) NULLABLE for rollback.
ALTER TABLE sentence_embeddings
ADD COLUMN IF NOT EXISTS embeddings vector(1536);
-- No restoration action needed as embedding_384 and embedding_1536 are preserved

View File

@ -277,18 +277,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
logrus.WithError(err).Warn("skipping sentence embedding insert")
return err
}
// Build vector literal
// Build array literal
var b strings.Builder
b.Grow(len(embedding)*8 + 2)
b.WriteByte('[')
b.WriteByte('{')
for i, v := range embedding {
if i > 0 {
b.WriteByte(',')
}
b.WriteString(strconv.FormatFloat(v, 'f', -1, 64))
}
b.WriteByte(']')
vecLiteral := b.String()
b.WriteByte('}')
arrayLiteral := b.String()
ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
defer cancel()
@ -296,18 +296,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
var sqlStmt string
if l == 384 {
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384)
VALUES ($1,$2,$3,$4::vector)
VALUES ($1,$2,$3,$4::float[])
ON CONFLICT (visit_id, sentence) DO UPDATE
SET embedding_384 = EXCLUDED.embedding_384,
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
} else { // 1536
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536)
VALUES ($1,$2,$3,$4::vector)
VALUES ($1,$2,$3,$4::float[])
ON CONFLICT (visit_id, sentence) DO UPDATE
SET embedding_1536 = EXCLUDED.embedding_1536,
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
}
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, vecLiteral)
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, arrayLiteral)
if err != nil {
logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)")
}