rewriten migration
This commit is contained in:
parent
c63890b104
commit
edc9d3d667
|
|
@ -1,16 +1,16 @@
|
|||
-- +goose Up
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Create sentence_embeddings table using standard Postgres types (no vector extension)
|
||||
CREATE TABLE sentence_embeddings (
|
||||
id SERIAL PRIMARY KEY,
|
||||
visit_id INTEGER NOT NULL,
|
||||
sentence TEXT NOT NULL,
|
||||
translated TEXT,
|
||||
embeddings VECTOR(1536) NOT NULL,
|
||||
embeddings FLOAT[] NOT NULL, -- Using standard float array instead of vector
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Create unique index for efficient lookups and preventing duplicates
|
||||
CREATE UNIQUE INDEX idx_sentence_embeddings_visit_sentence ON sentence_embeddings (visit_id, sentence);
|
||||
|
||||
-- +goose Down
|
||||
DROP TABLE IF EXISTS sentence_embeddings;
|
||||
DROP EXTENSION IF EXISTS vector;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
-- +goose Up
|
||||
-- Altering visit_id type, keeping compatibility with standard Postgres types
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN visit_id TYPE TEXT USING visit_id::text;
|
||||
ALTER COLUMN visit_id TYPE TEXT;
|
||||
|
||||
-- +goose Down
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN visit_id TYPE INTEGER USING visit_id::integer;
|
||||
|
||||
ALTER COLUMN visit_id TYPE INTEGER USING (visit_id::integer);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
-- +goose Up
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_sentence_embeddings_visit_sentence
|
||||
ON sentence_embeddings(visit_id, sentence);
|
||||
-- The unique index was already created in migration 0003 when we switched to standard Postgres types
|
||||
-- This migration is kept for consistency in migration sequence but doesn't perform any action
|
||||
|
||||
-- +goose Down
|
||||
DROP INDEX IF EXISTS ux_sentence_embeddings_visit_sentence;
|
||||
-- No action needed for rollback
|
||||
|
|
|
|||
|
|
@ -1,14 +1,6 @@
|
|||
-- +goose Up
|
||||
-- WARNING: This alters the embeddings vector dimension from 1536 to 384.
|
||||
-- Ensure you are switching to a 384-dim embedding model (e.g., all-minilm).
|
||||
-- If existing rows have 1536-d vectors this command will fail; you may need to
|
||||
-- TRUNCATE TABLE sentence_embeddings first (after backing up) before applying.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN embeddings TYPE vector(384);
|
||||
-- Update schema to support 384-dimensional embeddings using standard Postgres types
|
||||
-- No need to modify column type as we're now using a flexible FLOAT[] array
|
||||
|
||||
-- +goose Down
|
||||
-- Revert to 1536 dimensions (for models like OpenAI text-embedding-3-large).
|
||||
-- Will fail if existing rows are 384-d.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN embeddings TYPE vector(1536);
|
||||
|
||||
-- No action needed for rollback since we're using a flexible array type
|
||||
|
|
|
|||
|
|
@ -1,13 +1,10 @@
|
|||
-- +goose Up
|
||||
-- Add separate columns for different embedding dimensions.
|
||||
-- Existing 'embeddings' column (if present) is left untouched for backward compatibility.
|
||||
-- Application code will now populate embedding_384 or embedding_1536 instead.
|
||||
-- Add separate columns for different embedding dimensions using standard Postgres FLOAT[] arrays
|
||||
ALTER TABLE sentence_embeddings
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 vector(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 vector(1536);
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 FLOAT[],
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 FLOAT[];
|
||||
|
||||
-- +goose Down
|
||||
ALTER TABLE sentence_embeddings
|
||||
DROP COLUMN IF EXISTS embedding_384,
|
||||
DROP COLUMN IF EXISTS embedding_1536;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,7 @@
|
|||
-- +goose Up
|
||||
-- Drop the legacy single-dimension embeddings column (was NOT NULL) to allow inserts
|
||||
-- that now use embedding_384 / embedding_1536. All new data goes into those columns.
|
||||
-- Drop legacy embeddings column as it's been replaced by embedding_384 and embedding_1536
|
||||
ALTER TABLE sentence_embeddings
|
||||
DROP COLUMN IF EXISTS embeddings;
|
||||
|
||||
-- +goose Down
|
||||
-- Re-create the legacy embeddings column (empty) as vector(1536) NULLABLE for rollback.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ADD COLUMN IF NOT EXISTS embeddings vector(1536);
|
||||
|
||||
-- No restoration action needed as embedding_384 and embedding_1536 are preserved
|
||||
|
|
|
|||
|
|
@ -277,18 +277,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
|
|||
logrus.WithError(err).Warn("skipping sentence embedding insert")
|
||||
return err
|
||||
}
|
||||
// Build vector literal
|
||||
// Build array literal
|
||||
var b strings.Builder
|
||||
b.Grow(len(embedding)*8 + 2)
|
||||
b.WriteByte('[')
|
||||
b.WriteByte('{')
|
||||
for i, v := range embedding {
|
||||
if i > 0 {
|
||||
b.WriteByte(',')
|
||||
}
|
||||
b.WriteString(strconv.FormatFloat(v, 'f', -1, 64))
|
||||
}
|
||||
b.WriteByte(']')
|
||||
vecLiteral := b.String()
|
||||
b.WriteByte('}')
|
||||
arrayLiteral := b.String()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
|
||||
defer cancel()
|
||||
|
|
@ -296,18 +296,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
|
|||
var sqlStmt string
|
||||
if l == 384 {
|
||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384)
|
||||
VALUES ($1,$2,$3,$4::vector)
|
||||
VALUES ($1,$2,$3,$4::float[])
|
||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||
SET embedding_384 = EXCLUDED.embedding_384,
|
||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||
} else { // 1536
|
||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536)
|
||||
VALUES ($1,$2,$3,$4::vector)
|
||||
VALUES ($1,$2,$3,$4::float[])
|
||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||
SET embedding_1536 = EXCLUDED.embedding_1536,
|
||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||
}
|
||||
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, vecLiteral)
|
||||
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, arrayLiteral)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)")
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue