rewriten migration
This commit is contained in:
parent
c63890b104
commit
edc9d3d667
|
|
@ -1,16 +1,16 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
CREATE EXTENSION IF NOT EXISTS vector;
|
-- Create sentence_embeddings table using standard Postgres types (no vector extension)
|
||||||
|
|
||||||
CREATE TABLE sentence_embeddings (
|
CREATE TABLE sentence_embeddings (
|
||||||
id SERIAL PRIMARY KEY,
|
id SERIAL PRIMARY KEY,
|
||||||
visit_id INTEGER NOT NULL,
|
visit_id INTEGER NOT NULL,
|
||||||
sentence TEXT NOT NULL,
|
sentence TEXT NOT NULL,
|
||||||
translated TEXT,
|
translated TEXT,
|
||||||
embeddings VECTOR(1536) NOT NULL,
|
embeddings FLOAT[] NOT NULL, -- Using standard float array instead of vector
|
||||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Create unique index for efficient lookups and preventing duplicates
|
||||||
|
CREATE UNIQUE INDEX idx_sentence_embeddings_visit_sentence ON sentence_embeddings (visit_id, sentence);
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
DROP TABLE IF EXISTS sentence_embeddings;
|
DROP TABLE IF EXISTS sentence_embeddings;
|
||||||
DROP EXTENSION IF EXISTS vector;
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
|
-- Altering visit_id type, keeping compatibility with standard Postgres types
|
||||||
ALTER TABLE sentence_embeddings
|
ALTER TABLE sentence_embeddings
|
||||||
ALTER COLUMN visit_id TYPE TEXT USING visit_id::text;
|
ALTER COLUMN visit_id TYPE TEXT;
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
ALTER TABLE sentence_embeddings
|
ALTER TABLE sentence_embeddings
|
||||||
ALTER COLUMN visit_id TYPE INTEGER USING visit_id::integer;
|
ALTER COLUMN visit_id TYPE INTEGER USING (visit_id::integer);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_sentence_embeddings_visit_sentence
|
-- The unique index was already created in migration 0003 when we switched to standard Postgres types
|
||||||
ON sentence_embeddings(visit_id, sentence);
|
-- This migration is kept for consistency in migration sequence but doesn't perform any action
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
DROP INDEX IF EXISTS ux_sentence_embeddings_visit_sentence;
|
-- No action needed for rollback
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,6 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
-- WARNING: This alters the embeddings vector dimension from 1536 to 384.
|
-- Update schema to support 384-dimensional embeddings using standard Postgres types
|
||||||
-- Ensure you are switching to a 384-dim embedding model (e.g., all-minilm).
|
-- No need to modify column type as we're now using a flexible FLOAT[] array
|
||||||
-- If existing rows have 1536-d vectors this command will fail; you may need to
|
|
||||||
-- TRUNCATE TABLE sentence_embeddings first (after backing up) before applying.
|
|
||||||
ALTER TABLE sentence_embeddings
|
|
||||||
ALTER COLUMN embeddings TYPE vector(384);
|
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
-- Revert to 1536 dimensions (for models like OpenAI text-embedding-3-large).
|
-- No action needed for rollback since we're using a flexible array type
|
||||||
-- Will fail if existing rows are 384-d.
|
|
||||||
ALTER TABLE sentence_embeddings
|
|
||||||
ALTER COLUMN embeddings TYPE vector(1536);
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,10 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
-- Add separate columns for different embedding dimensions.
|
-- Add separate columns for different embedding dimensions using standard Postgres FLOAT[] arrays
|
||||||
-- Existing 'embeddings' column (if present) is left untouched for backward compatibility.
|
|
||||||
-- Application code will now populate embedding_384 or embedding_1536 instead.
|
|
||||||
ALTER TABLE sentence_embeddings
|
ALTER TABLE sentence_embeddings
|
||||||
ADD COLUMN IF NOT EXISTS embedding_384 vector(384),
|
ADD COLUMN IF NOT EXISTS embedding_384 FLOAT[],
|
||||||
ADD COLUMN IF NOT EXISTS embedding_1536 vector(1536);
|
ADD COLUMN IF NOT EXISTS embedding_1536 FLOAT[];
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
ALTER TABLE sentence_embeddings
|
ALTER TABLE sentence_embeddings
|
||||||
DROP COLUMN IF EXISTS embedding_384,
|
DROP COLUMN IF EXISTS embedding_384,
|
||||||
DROP COLUMN IF EXISTS embedding_1536;
|
DROP COLUMN IF EXISTS embedding_1536;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,7 @@
|
||||||
-- +goose Up
|
-- +goose Up
|
||||||
-- Drop the legacy single-dimension embeddings column (was NOT NULL) to allow inserts
|
-- Drop legacy embeddings column as it's been replaced by embedding_384 and embedding_1536
|
||||||
-- that now use embedding_384 / embedding_1536. All new data goes into those columns.
|
|
||||||
ALTER TABLE sentence_embeddings
|
ALTER TABLE sentence_embeddings
|
||||||
DROP COLUMN IF EXISTS embeddings;
|
DROP COLUMN IF EXISTS embeddings;
|
||||||
|
|
||||||
-- +goose Down
|
-- +goose Down
|
||||||
-- Re-create the legacy embeddings column (empty) as vector(1536) NULLABLE for rollback.
|
-- No restoration action needed as embedding_384 and embedding_1536 are preserved
|
||||||
ALTER TABLE sentence_embeddings
|
|
||||||
ADD COLUMN IF NOT EXISTS embeddings vector(1536);
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -277,18 +277,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
|
||||||
logrus.WithError(err).Warn("skipping sentence embedding insert")
|
logrus.WithError(err).Warn("skipping sentence embedding insert")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Build vector literal
|
// Build array literal
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.Grow(len(embedding)*8 + 2)
|
b.Grow(len(embedding)*8 + 2)
|
||||||
b.WriteByte('[')
|
b.WriteByte('{')
|
||||||
for i, v := range embedding {
|
for i, v := range embedding {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
b.WriteByte(',')
|
b.WriteByte(',')
|
||||||
}
|
}
|
||||||
b.WriteString(strconv.FormatFloat(v, 'f', -1, 64))
|
b.WriteString(strconv.FormatFloat(v, 'f', -1, 64))
|
||||||
}
|
}
|
||||||
b.WriteByte(']')
|
b.WriteByte('}')
|
||||||
vecLiteral := b.String()
|
arrayLiteral := b.String()
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
|
ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
@ -296,18 +296,18 @@ func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID,
|
||||||
var sqlStmt string
|
var sqlStmt string
|
||||||
if l == 384 {
|
if l == 384 {
|
||||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384)
|
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384)
|
||||||
VALUES ($1,$2,$3,$4::vector)
|
VALUES ($1,$2,$3,$4::float[])
|
||||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||||
SET embedding_384 = EXCLUDED.embedding_384,
|
SET embedding_384 = EXCLUDED.embedding_384,
|
||||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||||
} else { // 1536
|
} else { // 1536
|
||||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536)
|
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536)
|
||||||
VALUES ($1,$2,$3,$4::vector)
|
VALUES ($1,$2,$3,$4::float[])
|
||||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||||
SET embedding_1536 = EXCLUDED.embedding_1536,
|
SET embedding_1536 = EXCLUDED.embedding_1536,
|
||||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||||
}
|
}
|
||||||
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, vecLiteral)
|
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, arrayLiteral)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)")
|
logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue