From c63890b1041b8da08a92cb5229341659341ba68b Mon Sep 17 00:00:00 2001 From: lehel Date: Wed, 8 Oct 2025 21:04:33 +0200 Subject: [PATCH] embedding db --- Makefile | 39 ++++- config/config.yaml | 2 +- llm.go | 9 +- main.go | 3 + ...r_sentence_embeddings_visit_id_type.up.sql | 8 + ..._add_unique_idx_sentence_embeddings.up.sql | 6 + ...6_alter_sentence_embeddings_dim_384.up.sql | 14 ++ .../0007_add_dual_embedding_columns.up.sql | 13 ++ .../0008_drop_legacy_embeddings_column.up.sql | 11 ++ ollama_client.go | 163 ++++++++++++++---- repository.go | 66 +++++++ sentence_embeddings.go | 154 +++++++++++++++++ 12 files changed, 448 insertions(+), 40 deletions(-) create mode 100644 migrations/0004_alter_sentence_embeddings_visit_id_type.up.sql create mode 100644 migrations/0005_add_unique_idx_sentence_embeddings.up.sql create mode 100644 migrations/0006_alter_sentence_embeddings_dim_384.up.sql create mode 100644 migrations/0007_add_dual_embedding_columns.up.sql create mode 100644 migrations/0008_drop_legacy_embeddings_column.up.sql create mode 100644 sentence_embeddings.go diff --git a/Makefile b/Makefile index 5d754d0..2ef8332 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Makefile for running the Vet Clinic Chat Assistant locally with Ollama -.PHONY: run ollama-start ollama-stop ollama-pull ollama-status +.PHONY: run ollama-start ollama-stop ollama-pull ollama-status curl-embed curl-translate curl-chat # Start Ollama server (if not already running) ollama-start: @@ -20,6 +20,15 @@ ollama-pull: ollama-status: ollama list +# Ollama host & models (override as needed) +OLLAMA_HOST ?= http://localhost:11434 +# Primary chat / reasoning model (already using OPENAI_MODEL var for compatibility) +OPENAI_MODEL ?= qwen3:latest +# Optional separate embedding model +OLLAMA_EMBED_MODEL ?= all-minilm +# Translation prompt (mirrors config.yaml translate_prompt). Can override: make curl-translate PROMPT="..." TRANSLATE_PROMPT="..." +TRANSLATE_PROMPT ?= Translate the following veterinary-related sentence to English. Input: '$(PROMPT)'. Return ONLY the English translation, no extra text, no markdown, no quotes. If already English, return as is. + # Database configuration (override via: make run DB_PASSWORD=secret DB_NAME=other) DB_HOST ?= localhost DB_PORT ?= 5432 @@ -70,3 +79,31 @@ test-coverage: # Run tests with HTML coverage report test-coverage-html: test-coverage go tool cover -html=coverage.out + +# --- Utility curl targets --- +# Example: make curl-embed PROMPT="warm up" +curl-embed: + @test -n "$(PROMPT)" || { echo "Usage: make curl-embed PROMPT='text' [OLLAMA_EMBED_MODEL=model]"; exit 1; } + @echo "[curl-embed] model=$(OLLAMA_EMBED_MODEL) prompt='$(PROMPT)'" + @curl -sS -X POST "$(OLLAMA_HOST)/api/embeddings" \ + -H 'Content-Type: application/json' \ + -d '{"model":"$(OLLAMA_EMBED_MODEL)","prompt":"$(PROMPT)"}' | jq . || true + +# Example: make curl-translate PROMPT="A kutyám nem eszik" +curl-translate: + @test -n "$(PROMPT)" || { echo "Usage: make curl-translate PROMPT='sentence to translate'"; exit 1; } + @echo "[curl-translate] model=$(OPENAI_MODEL)"; \ + PROMPT_JSON=$$(printf '%s' "$(TRANSLATE_PROMPT)" | jq -Rs .); \ + curl -sS -X POST "$(OLLAMA_HOST)/api/chat" \ + -H 'Content-Type: application/json' \ + -d '{"model":"$(OPENAI_MODEL)","messages":[{"role":"user","content":'$$PROMPT_JSON'}],"stream":false}' | jq -r '.message.content' || true + +# Generic chat invocation (raw user PROMPT) +# Example: make curl-chat PROMPT="List 3 dog breeds" +curl-chat: + @test -n "$(PROMPT)" || { echo "Usage: make curl-chat PROMPT='your message'"; exit 1; } + @echo "[curl-chat] model=$(OPENAI_MODEL)"; \ + PROMPT_JSON=$$(printf '%s' "$(PROMPT)" | jq -Rs .); \ + curl -sS -X POST "$(OLLAMA_HOST)/api/chat" \ + -H 'Content-Type: application/json' \ + -d '{"model":"$(OPENAI_MODEL)","messages":[{"role":"user","content":'$$PROMPT_JSON'}],"stream":false}' | jq -r '.message.content' || true diff --git a/config/config.yaml b/config/config.yaml index 5d42153..6bc1e61 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,4 +1,4 @@ llm: extract_keywords_prompt: "You will extract structured data from the user input. Input text: {{.Message}}. Return ONLY valid minified JSON object with keys: translate (English translation of input), keyword (array of 3-5 concise English veterinary-related keywords derived strictly from the input), animal (animal mentioned or 'unknown'). Example: {\"translate\":\"dog has diarrhea\",\"keyword\":[\"diarrhea\",\"digestive\"],\"animal\":\"dog\"}. Do not add extra text, markdown, or quotes outside JSON." disambiguate_prompt: "Given candidate visit entries (JSON array): {{.Entries}} and user message: {{.Message}} choose the best matching visit's ID. Return ONLY JSON: {\"visitReason\":\"\"}. No other text." - translate_prompt: "Translate the following veterinary-related sentence to English. Input: '{{.Message}}'. Return ONLY the English translation, no extra text, no markdown, no quotes. If already English, return as is." + translate_prompt: "Translate the following veterinary-related sentence to English. Input: '{{.Message}}'. Return ONLY the English translation as one concise sentence. IMPORTANT: Do NOT output any tags, reasoning, analysis, or explanations. No markdown, no quotes. If already English, return it unchanged." diff --git a/llm.go b/llm.go index 7311786..7cacc51 100644 --- a/llm.go +++ b/llm.go @@ -55,7 +55,14 @@ func NewLLMClientFromEnv(repo ChatRepositoryAPI) LLMClientAPI { case "openai", "openrouter": return NewOpenAIClient(apiKey, baseURL, model, repo) case "ollama", "": - return NewOllamaClient(apiKey, baseURL, model, repo) + oc := NewOllamaClient(apiKey, baseURL, model, repo) + em := os.Getenv("OLLAMA_EMBED_MODEL") + if strings.TrimSpace(em) == "" { + em = "all-minilm" + logrus.Infof("No OLLAMA_EMBED_MODEL specified; defaulting embedding model to %s", em) + } + oc.EmbeddingModel = em + return oc default: logrus.Warnf("Unknown LLM_PROVIDER %q, defaulting to Ollama", provider) return NewOllamaClient(apiKey, baseURL, model, repo) diff --git a/main.go b/main.go index cfa03e4..8cea037 100644 --- a/main.go +++ b/main.go @@ -72,6 +72,9 @@ func main() { // Initialize LLM client llm := NewLLMClientFromEnv(repo) + // Launch background backfill of sentence embeddings (non-blocking) + startSentenceEmbeddingBackfill(repo, llm, &visitDB) + // Wrap templates for controller uiTmpl := &TemplateWrapper{Tmpl: uiTemplate} uiDBEditTmpl := &TemplateWrapper{Tmpl: uiDBEditTemplate} diff --git a/migrations/0004_alter_sentence_embeddings_visit_id_type.up.sql b/migrations/0004_alter_sentence_embeddings_visit_id_type.up.sql new file mode 100644 index 0000000..2d58220 --- /dev/null +++ b/migrations/0004_alter_sentence_embeddings_visit_id_type.up.sql @@ -0,0 +1,8 @@ +-- +goose Up +ALTER TABLE sentence_embeddings + ALTER COLUMN visit_id TYPE TEXT USING visit_id::text; + +-- +goose Down +ALTER TABLE sentence_embeddings + ALTER COLUMN visit_id TYPE INTEGER USING visit_id::integer; + diff --git a/migrations/0005_add_unique_idx_sentence_embeddings.up.sql b/migrations/0005_add_unique_idx_sentence_embeddings.up.sql new file mode 100644 index 0000000..23db44e --- /dev/null +++ b/migrations/0005_add_unique_idx_sentence_embeddings.up.sql @@ -0,0 +1,6 @@ +-- +goose Up +CREATE UNIQUE INDEX IF NOT EXISTS ux_sentence_embeddings_visit_sentence + ON sentence_embeddings(visit_id, sentence); + +-- +goose Down +DROP INDEX IF EXISTS ux_sentence_embeddings_visit_sentence; diff --git a/migrations/0006_alter_sentence_embeddings_dim_384.up.sql b/migrations/0006_alter_sentence_embeddings_dim_384.up.sql new file mode 100644 index 0000000..415c4d6 --- /dev/null +++ b/migrations/0006_alter_sentence_embeddings_dim_384.up.sql @@ -0,0 +1,14 @@ +-- +goose Up +-- WARNING: This alters the embeddings vector dimension from 1536 to 384. +-- Ensure you are switching to a 384-dim embedding model (e.g., all-minilm). +-- If existing rows have 1536-d vectors this command will fail; you may need to +-- TRUNCATE TABLE sentence_embeddings first (after backing up) before applying. +ALTER TABLE sentence_embeddings + ALTER COLUMN embeddings TYPE vector(384); + +-- +goose Down +-- Revert to 1536 dimensions (for models like OpenAI text-embedding-3-large). +-- Will fail if existing rows are 384-d. +ALTER TABLE sentence_embeddings + ALTER COLUMN embeddings TYPE vector(1536); + diff --git a/migrations/0007_add_dual_embedding_columns.up.sql b/migrations/0007_add_dual_embedding_columns.up.sql new file mode 100644 index 0000000..c708bbd --- /dev/null +++ b/migrations/0007_add_dual_embedding_columns.up.sql @@ -0,0 +1,13 @@ +-- +goose Up +-- Add separate columns for different embedding dimensions. +-- Existing 'embeddings' column (if present) is left untouched for backward compatibility. +-- Application code will now populate embedding_384 or embedding_1536 instead. +ALTER TABLE sentence_embeddings + ADD COLUMN IF NOT EXISTS embedding_384 vector(384), + ADD COLUMN IF NOT EXISTS embedding_1536 vector(1536); + +-- +goose Down +ALTER TABLE sentence_embeddings + DROP COLUMN IF EXISTS embedding_384, + DROP COLUMN IF EXISTS embedding_1536; + diff --git a/migrations/0008_drop_legacy_embeddings_column.up.sql b/migrations/0008_drop_legacy_embeddings_column.up.sql new file mode 100644 index 0000000..f02028e --- /dev/null +++ b/migrations/0008_drop_legacy_embeddings_column.up.sql @@ -0,0 +1,11 @@ +-- +goose Up +-- Drop the legacy single-dimension embeddings column (was NOT NULL) to allow inserts +-- that now use embedding_384 / embedding_1536. All new data goes into those columns. +ALTER TABLE sentence_embeddings + DROP COLUMN IF EXISTS embeddings; + +-- +goose Down +-- Re-create the legacy embeddings column (empty) as vector(1536) NULLABLE for rollback. +ALTER TABLE sentence_embeddings + ADD COLUMN IF NOT EXISTS embeddings vector(1536); + diff --git a/ollama_client.go b/ollama_client.go index 0b4d786..1261427 100644 --- a/ollama_client.go +++ b/ollama_client.go @@ -7,7 +7,10 @@ import ( "fmt" "io" "net/http" + "os" + "strconv" "strings" + "time" "github.com/sirupsen/logrus" ) @@ -15,10 +18,11 @@ import ( // --- OllamaClient implementation --- type OllamaClient struct { - APIKey string - BaseURL string - Model string - Repo ChatRepositoryAPI + APIKey string + BaseURL string + Model string + EmbeddingModel string + Repo ChatRepositoryAPI } func NewOllamaClient(apiKey, baseURL, model string, repo ChatRepositoryAPI) *OllamaClient { @@ -90,12 +94,28 @@ func (llm *OllamaClient) ollamaCompletion(ctx context.Context, prompt string, fo if apiURL == "" { apiURL = "http://localhost:11434/api/chat" } + + messages := []map[string]string{{"role": "user", "content": prompt}} + //if os.Getenv("DISABLE_THINK") == "1" { + // System message to suppress chain-of-thought style outputs. + messages = append([]map[string]string{{ + "role": "system", + "content": "You are a concise assistant. Output ONLY the final answer requested by the user. Do not include reasoning, analysis, or tags.", + }}, messages...) + //} + body := map[string]interface{}{ "model": llm.Model, - "messages": []map[string]string{{"role": "user", "content": prompt}}, + "messages": messages, "stream": false, "format": format, } + + // Optional: Add a stop sequence to prevent tags if they appear + if os.Getenv("DISABLE_THINK") == "1" { + body["options"] = map[string]interface{}{"stop": []string{""}} + } + jsonBody, _ := json.Marshal(body) req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody)) if llm.APIKey != "" { @@ -128,43 +148,112 @@ func (llm *OllamaClient) ollamaCompletion(ctx context.Context, prompt string, fo return "", fmt.Errorf("unrecognized LLM response format: %.200s", string(raw)) } +func normalizeOllamaHost(raw string) string { + if raw == "" { + return "http://localhost:11434" + } + // strip trailing /api/* paths if user provided full endpoint + lower := strings.ToLower(raw) + for _, seg := range []string{"/api/chat", "/api/embeddings", "/api/generate"} { + if strings.HasSuffix(lower, seg) { + return raw[:len(raw)-len(seg)] + } + } + return raw +} + func (llm *OllamaClient) GetEmbeddings(ctx context.Context, input string) ([]float64, error) { - apiURL := llm.BaseURL - if apiURL == "" { - apiURL = "http://localhost:11434/api/embeddings" + host := normalizeOllamaHost(llm.BaseURL) + apiURL := host + "/api/embeddings" + modelName := llm.Model + if llm.EmbeddingModel != "" { + modelName = llm.EmbeddingModel } - body := map[string]interface{}{ - "model": llm.Model, - "prompt": input, + // retry parameters (env override OLLAMA_EMBED_ATTEMPTS) + maxAttempts := 5 + if v := os.Getenv("OLLAMA_EMBED_ATTEMPTS"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 && n < 20 { + maxAttempts = n + } } - jsonBody, _ := json.Marshal(body) - req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody)) - if llm.APIKey != "" { - req.Header.Set("Authorization", "Bearer "+llm.APIKey) + baseBackoff := 300 * time.Millisecond + var lastErr error + for attempt := 0; attempt < maxAttempts; attempt++ { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + body := map[string]interface{}{ + "model": modelName, + "prompt": input, + } + jsonBody, _ := json.Marshal(body) + req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody)) + if llm.APIKey != "" { + req.Header.Set("Authorization", "Bearer "+llm.APIKey) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + resp, err := (&http.Client{}).Do(req) + if err != nil { + lastErr = err + logrus.WithError(err).Warnf("[Ollama] embeddings request attempt=%d failed", attempt+1) + } else { + raw, rerr := io.ReadAll(resp.Body) + resp.Body.Close() + if rerr != nil { + lastErr = rerr + } else { + var generic map[string]json.RawMessage + if jerr := json.Unmarshal(raw, &generic); jerr != nil { + lastErr = fmt.Errorf("unrecognized response (parse): %w", jerr) + } else if embRaw, ok := generic["embedding"]; ok && len(embRaw) > 0 { + var emb []float64 + if jerr := json.Unmarshal(embRaw, &emb); jerr != nil { + lastErr = fmt.Errorf("failed to decode embedding: %w", jerr) + } else if len(emb) == 0 { + lastErr = fmt.Errorf("empty embedding returned") + } else { + return emb, nil + } + } else if drRaw, ok := generic["done_reason"]; ok { + var reason string + _ = json.Unmarshal(drRaw, &reason) + if reason == "load" { // transient model loading state + lastErr = fmt.Errorf("model loading") + } else { + lastErr = fmt.Errorf("unexpected done_reason=%s", reason) + } + } else if errRaw, ok := generic["error"]; ok { + var errMsg string + _ = json.Unmarshal(errRaw, &errMsg) + if errMsg != "" { + lastErr = fmt.Errorf("embedding error: %s", errMsg) + } else { + lastErr = fmt.Errorf("embedding error (empty message)") + } + } else { + lastErr = fmt.Errorf("unrecognized embedding response: %.200s", string(raw)) + } + } + } + if lastErr == nil { + break + } + // backoff if not last attempt + if attempt < maxAttempts-1 { + delay := baseBackoff << attempt + if strings.Contains(strings.ToLower(lastErr.Error()), "model loading") { + delay += 1 * time.Second + } + time.Sleep(delay) + } } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Accept", "application/json") - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - return nil, err + if lastErr == nil { + lastErr = fmt.Errorf("embedding retrieval failed with no error info") } - defer resp.Body.Close() - raw, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - var ollama struct { - Embedding []float64 `json:"embedding"` - Error string `json:"error"` - } - if err := json.Unmarshal(raw, &ollama); err == nil && len(ollama.Embedding) > 0 { - return ollama.Embedding, nil - } - if ollama.Error != "" { - return nil, fmt.Errorf("embedding error: %s", ollama.Error) - } - return nil, fmt.Errorf("unrecognized embedding response: %.200s", string(raw)) + return nil, lastErr } func (llm *OllamaClient) TranslateToEnglish(ctx context.Context, message string) (string, error) { diff --git a/repository.go b/repository.go index aa3f79e..d51870b 100644 --- a/repository.go +++ b/repository.go @@ -2,7 +2,10 @@ package main import ( "context" + "fmt" "os" + "strconv" + "strings" "time" "github.com/jackc/pgx/v5/pgconn" @@ -263,6 +266,69 @@ func (r *PGChatRepository) CreateUser(ctx context.Context, username, passwordHas return err } +// InsertSentenceEmbedding inserts a sentence embedding if not already present (unique index on visit_id,sentence) +func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID, sentence, translated string, embedding []float64) error { + if r == nil || r.pool == nil { + return nil + } + l := len(embedding) + if l != 384 && l != 1536 { + err := fmt.Errorf("unsupported embedding length %d (expected 384 or 1536)", l) + logrus.WithError(err).Warn("skipping sentence embedding insert") + return err + } + // Build vector literal + var b strings.Builder + b.Grow(len(embedding)*8 + 2) + b.WriteByte('[') + for i, v := range embedding { + if i > 0 { + b.WriteByte(',') + } + b.WriteString(strconv.FormatFloat(v, 'f', -1, 64)) + } + b.WriteByte(']') + vecLiteral := b.String() + + ctx, cancel := context.WithTimeout(ctx, 6*time.Second) + defer cancel() + + var sqlStmt string + if l == 384 { + sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384) + VALUES ($1,$2,$3,$4::vector) + ON CONFLICT (visit_id, sentence) DO UPDATE + SET embedding_384 = EXCLUDED.embedding_384, + translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)` + } else { // 1536 + sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536) + VALUES ($1,$2,$3,$4::vector) + ON CONFLICT (visit_id, sentence) DO UPDATE + SET embedding_1536 = EXCLUDED.embedding_1536, + translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)` + } + _, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, vecLiteral) + if err != nil { + logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)") + } + return err +} + +// ExistsSentenceEmbedding checks if a sentence embedding exists +func (r *PGChatRepository) ExistsSentenceEmbedding(ctx context.Context, visitID, sentence string) (bool, error) { + if r == nil || r.pool == nil { + return false, nil + } + ctx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + var exists bool + err := r.pool.QueryRow(ctx, `SELECT EXISTS (SELECT 1 FROM sentence_embeddings WHERE visit_id=$1 AND sentence=$2)`, visitID, sentence).Scan(&exists) + if err != nil { + return false, err + } + return exists, nil +} + // Close releases pool resources func (r *PGChatRepository) Close() { if r != nil && r.pool != nil { diff --git a/sentence_embeddings.go b/sentence_embeddings.go new file mode 100644 index 0000000..50f3ca2 --- /dev/null +++ b/sentence_embeddings.go @@ -0,0 +1,154 @@ +package main + +import ( + "context" + "os" + "regexp" + "strings" + "time" + + "github.com/sirupsen/logrus" +) + +var sentenceSplitRegex = regexp.MustCompile(`(?m)(?:[^.!?\n]+[.!?]|[^.!?\n]+$)`) + +// configurable via env (seconds); defaults chosen for model cold start friendliness +func envDuration(key string, def time.Duration) time.Duration { + if v := os.Getenv(key); v != "" { + if d, err := time.ParseDuration(v); err == nil { + return d + } + } + return def +} + +// startSentenceEmbeddingBackfill launches a background goroutine that iterates all visits +// and stores (visit_id, sentence, translated, embedding) records in sentence_embeddings table +// if they do not already exist (relying on unique index ON CONFLICT DO NOTHING). +func startSentenceEmbeddingBackfill(repo *PGChatRepository, llm LLMClientAPI, vdb *VisitDB) { + if repo == nil || llm == nil || vdb == nil { + logrus.Info("Sentence embedding backfill skipped (missing repo, llm or vdb)") + return + } + if disable := strings.ToLower(os.Getenv("SENTENCE_BACKFILL_DISABLE")); disable == "1" || disable == "true" { + logrus.Info("Sentence embedding backfill disabled via SENTENCE_BACKFILL_DISABLE env var") + return + } + translateTimeout := envDuration("TRANSLATE_TIMEOUT", 45*time.Second) + embeddingTimeout := envDuration("EMBEDDING_TIMEOUT", 45*time.Second) + maxTranslateAttempts := 3 + maxEmbeddingAttempts := 3 + go func() { + start := time.Now() + logrus.WithFields(logrus.Fields{"translateTimeout": translateTimeout, "embeddingTimeout": embeddingTimeout}).Info("Sentence embedding backfill started") + processed := 0 + inserted := 0 + skippedExisting := 0 + skippedDueToFailures := 0 + for _, visit := range vdb.visitsDB { // visitsDB accessible within package + if strings.TrimSpace(visit.Visit) == "" { + continue + } + sentences := extractSentences(visit.Visit) + for _, s := range sentences { + processed++ + trimmed := strings.TrimSpace(s) + if len(trimmed) < 3 { + continue + } + // Existence check before any LLM calls + existsCtx, existsCancel := context.WithTimeout(context.Background(), 2*time.Second) + exists, err := repo.ExistsSentenceEmbedding(existsCtx, visit.ID, trimmed) + existsCancel() + if err != nil { + logrus.WithError(err).Warnf("Exists check failed visit=%s sentence=%q", visit.ID, trimmed) + } else if exists { + skippedExisting++ + continue + } + + // Translation with retry/backoff + var translated string + translateErr := retry(maxTranslateAttempts, 0, func(at int) error { + ctx, cancel := context.WithTimeout(context.Background(), translateTimeout) + defer cancel() + resp, err := llm.TranslateToEnglish(ctx, trimmed) + if err != nil { + logrus.WithError(err).Warnf("Translate attempt=%d failed visit=%s sentence=%q", at+1, visit.ID, trimmed) + return err + } + translated = strings.TrimSpace(resp) + return nil + }) + if translateErr != nil || translated == "" { + translated = trimmed // fallback keep original language + } + + // Embedding with retry/backoff (skip if translation totally failed with deadline each time) + var emb []float64 + embErr := retry(maxEmbeddingAttempts, 0, func(at int) error { + ctx, cancel := context.WithTimeout(context.Background(), embeddingTimeout) + defer cancel() + vec, err := llm.GetEmbeddings(ctx, translated) + if err != nil { + logrus.WithError(err).Warnf("Embeddings attempt=%d failed visit=%s sentence=%q", at+1, visit.ID, trimmed) + return err + } + emb = vec + return nil + }) + if embErr != nil { + skippedDueToFailures++ + continue + } + + persistCtx, pcancel := context.WithTimeout(context.Background(), 5*time.Second) + if err := repo.InsertSentenceEmbedding(persistCtx, visit.ID, trimmed, translated, emb); err == nil { + inserted++ + } + pcancel() + // Throttle (configurable?) + time.Sleep(50 * time.Millisecond) + } + } + logrus.Infof("Sentence embedding backfill complete processed=%d inserted=%d skipped_existing=%d skipped_failures=%d elapsed=%s", processed, inserted, skippedExisting, skippedDueToFailures, time.Since(start)) + }() +} + +// retry executes fn up to attempts times with exponential backoff starting at base (or 200ms if base==0) +func retry(attempts int, base time.Duration, fn func(attempt int) error) error { + if attempts <= 0 { + return nil + } + if base <= 0 { + base = 200 * time.Millisecond + } + var err error + for a := 0; a < attempts; a++ { + err = fn(a) + if err == nil { + return nil + } + // backoff except after last attempt + if a < attempts-1 { + backoff := base << a // exponential + time.Sleep(backoff) + } + } + return err +} + +// extractSentences splits a block of text into sentence-like units. +func extractSentences(text string) []string { + // First replace newlines with space to keep regex simpler, keep periods. + normalized := strings.ReplaceAll(text, "\n", " ") + matches := sentenceSplitRegex.FindAllString(normalized, -1) + var out []string + for _, m := range matches { + m = strings.TrimSpace(m) + if m != "" { + out = append(out, m) + } + } + return out +}