embedding db
This commit is contained in:
parent
46a4374e69
commit
c63890b104
39
Makefile
39
Makefile
|
|
@ -1,6 +1,6 @@
|
|||
# Makefile for running the Vet Clinic Chat Assistant locally with Ollama
|
||||
|
||||
.PHONY: run ollama-start ollama-stop ollama-pull ollama-status
|
||||
.PHONY: run ollama-start ollama-stop ollama-pull ollama-status curl-embed curl-translate curl-chat
|
||||
|
||||
# Start Ollama server (if not already running)
|
||||
ollama-start:
|
||||
|
|
@ -20,6 +20,15 @@ ollama-pull:
|
|||
ollama-status:
|
||||
ollama list
|
||||
|
||||
# Ollama host & models (override as needed)
|
||||
OLLAMA_HOST ?= http://localhost:11434
|
||||
# Primary chat / reasoning model (already using OPENAI_MODEL var for compatibility)
|
||||
OPENAI_MODEL ?= qwen3:latest
|
||||
# Optional separate embedding model
|
||||
OLLAMA_EMBED_MODEL ?= all-minilm
|
||||
# Translation prompt (mirrors config.yaml translate_prompt). Can override: make curl-translate PROMPT="..." TRANSLATE_PROMPT="..."
|
||||
TRANSLATE_PROMPT ?= Translate the following veterinary-related sentence to English. Input: '$(PROMPT)'. Return ONLY the English translation, no extra text, no markdown, no quotes. If already English, return as is.
|
||||
|
||||
# Database configuration (override via: make run DB_PASSWORD=secret DB_NAME=other)
|
||||
DB_HOST ?= localhost
|
||||
DB_PORT ?= 5432
|
||||
|
|
@ -70,3 +79,31 @@ test-coverage:
|
|||
# Run tests with HTML coverage report
|
||||
test-coverage-html: test-coverage
|
||||
go tool cover -html=coverage.out
|
||||
|
||||
# --- Utility curl targets ---
|
||||
# Example: make curl-embed PROMPT="warm up"
|
||||
curl-embed:
|
||||
@test -n "$(PROMPT)" || { echo "Usage: make curl-embed PROMPT='text' [OLLAMA_EMBED_MODEL=model]"; exit 1; }
|
||||
@echo "[curl-embed] model=$(OLLAMA_EMBED_MODEL) prompt='$(PROMPT)'"
|
||||
@curl -sS -X POST "$(OLLAMA_HOST)/api/embeddings" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model":"$(OLLAMA_EMBED_MODEL)","prompt":"$(PROMPT)"}' | jq . || true
|
||||
|
||||
# Example: make curl-translate PROMPT="A kutyám nem eszik"
|
||||
curl-translate:
|
||||
@test -n "$(PROMPT)" || { echo "Usage: make curl-translate PROMPT='sentence to translate'"; exit 1; }
|
||||
@echo "[curl-translate] model=$(OPENAI_MODEL)"; \
|
||||
PROMPT_JSON=$$(printf '%s' "$(TRANSLATE_PROMPT)" | jq -Rs .); \
|
||||
curl -sS -X POST "$(OLLAMA_HOST)/api/chat" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model":"$(OPENAI_MODEL)","messages":[{"role":"user","content":'$$PROMPT_JSON'}],"stream":false}' | jq -r '.message.content' || true
|
||||
|
||||
# Generic chat invocation (raw user PROMPT)
|
||||
# Example: make curl-chat PROMPT="List 3 dog breeds"
|
||||
curl-chat:
|
||||
@test -n "$(PROMPT)" || { echo "Usage: make curl-chat PROMPT='your message'"; exit 1; }
|
||||
@echo "[curl-chat] model=$(OPENAI_MODEL)"; \
|
||||
PROMPT_JSON=$$(printf '%s' "$(PROMPT)" | jq -Rs .); \
|
||||
curl -sS -X POST "$(OLLAMA_HOST)/api/chat" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model":"$(OPENAI_MODEL)","messages":[{"role":"user","content":'$$PROMPT_JSON'}],"stream":false}' | jq -r '.message.content' || true
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
llm:
|
||||
extract_keywords_prompt: "You will extract structured data from the user input. Input text: {{.Message}}. Return ONLY valid minified JSON object with keys: translate (English translation of input), keyword (array of 3-5 concise English veterinary-related keywords derived strictly from the input), animal (animal mentioned or 'unknown'). Example: {\"translate\":\"dog has diarrhea\",\"keyword\":[\"diarrhea\",\"digestive\"],\"animal\":\"dog\"}. Do not add extra text, markdown, or quotes outside JSON."
|
||||
disambiguate_prompt: "Given candidate visit entries (JSON array): {{.Entries}} and user message: {{.Message}} choose the best matching visit's ID. Return ONLY JSON: {\"visitReason\":\"<one of the candidate IDs or empty string if none>\"}. No other text."
|
||||
translate_prompt: "Translate the following veterinary-related sentence to English. Input: '{{.Message}}'. Return ONLY the English translation, no extra text, no markdown, no quotes. If already English, return as is."
|
||||
translate_prompt: "Translate the following veterinary-related sentence to English. Input: '{{.Message}}'. Return ONLY the English translation as one concise sentence. IMPORTANT: Do NOT output any <think> tags, reasoning, analysis, or explanations. No markdown, no quotes. If already English, return it unchanged."
|
||||
|
|
|
|||
9
llm.go
9
llm.go
|
|
@ -55,7 +55,14 @@ func NewLLMClientFromEnv(repo ChatRepositoryAPI) LLMClientAPI {
|
|||
case "openai", "openrouter":
|
||||
return NewOpenAIClient(apiKey, baseURL, model, repo)
|
||||
case "ollama", "":
|
||||
return NewOllamaClient(apiKey, baseURL, model, repo)
|
||||
oc := NewOllamaClient(apiKey, baseURL, model, repo)
|
||||
em := os.Getenv("OLLAMA_EMBED_MODEL")
|
||||
if strings.TrimSpace(em) == "" {
|
||||
em = "all-minilm"
|
||||
logrus.Infof("No OLLAMA_EMBED_MODEL specified; defaulting embedding model to %s", em)
|
||||
}
|
||||
oc.EmbeddingModel = em
|
||||
return oc
|
||||
default:
|
||||
logrus.Warnf("Unknown LLM_PROVIDER %q, defaulting to Ollama", provider)
|
||||
return NewOllamaClient(apiKey, baseURL, model, repo)
|
||||
|
|
|
|||
3
main.go
3
main.go
|
|
@ -72,6 +72,9 @@ func main() {
|
|||
// Initialize LLM client
|
||||
llm := NewLLMClientFromEnv(repo)
|
||||
|
||||
// Launch background backfill of sentence embeddings (non-blocking)
|
||||
startSentenceEmbeddingBackfill(repo, llm, &visitDB)
|
||||
|
||||
// Wrap templates for controller
|
||||
uiTmpl := &TemplateWrapper{Tmpl: uiTemplate}
|
||||
uiDBEditTmpl := &TemplateWrapper{Tmpl: uiDBEditTemplate}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
-- +goose Up
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN visit_id TYPE TEXT USING visit_id::text;
|
||||
|
||||
-- +goose Down
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN visit_id TYPE INTEGER USING visit_id::integer;
|
||||
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
-- +goose Up
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ux_sentence_embeddings_visit_sentence
|
||||
ON sentence_embeddings(visit_id, sentence);
|
||||
|
||||
-- +goose Down
|
||||
DROP INDEX IF EXISTS ux_sentence_embeddings_visit_sentence;
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
-- +goose Up
|
||||
-- WARNING: This alters the embeddings vector dimension from 1536 to 384.
|
||||
-- Ensure you are switching to a 384-dim embedding model (e.g., all-minilm).
|
||||
-- If existing rows have 1536-d vectors this command will fail; you may need to
|
||||
-- TRUNCATE TABLE sentence_embeddings first (after backing up) before applying.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN embeddings TYPE vector(384);
|
||||
|
||||
-- +goose Down
|
||||
-- Revert to 1536 dimensions (for models like OpenAI text-embedding-3-large).
|
||||
-- Will fail if existing rows are 384-d.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ALTER COLUMN embeddings TYPE vector(1536);
|
||||
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
-- +goose Up
|
||||
-- Add separate columns for different embedding dimensions.
|
||||
-- Existing 'embeddings' column (if present) is left untouched for backward compatibility.
|
||||
-- Application code will now populate embedding_384 or embedding_1536 instead.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 vector(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 vector(1536);
|
||||
|
||||
-- +goose Down
|
||||
ALTER TABLE sentence_embeddings
|
||||
DROP COLUMN IF EXISTS embedding_384,
|
||||
DROP COLUMN IF EXISTS embedding_1536;
|
||||
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
-- +goose Up
|
||||
-- Drop the legacy single-dimension embeddings column (was NOT NULL) to allow inserts
|
||||
-- that now use embedding_384 / embedding_1536. All new data goes into those columns.
|
||||
ALTER TABLE sentence_embeddings
|
||||
DROP COLUMN IF EXISTS embeddings;
|
||||
|
||||
-- +goose Down
|
||||
-- Re-create the legacy embeddings column (empty) as vector(1536) NULLABLE for rollback.
|
||||
ALTER TABLE sentence_embeddings
|
||||
ADD COLUMN IF NOT EXISTS embeddings vector(1536);
|
||||
|
||||
129
ollama_client.go
129
ollama_client.go
|
|
@ -7,7 +7,10 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
|
@ -18,6 +21,7 @@ type OllamaClient struct {
|
|||
APIKey string
|
||||
BaseURL string
|
||||
Model string
|
||||
EmbeddingModel string
|
||||
Repo ChatRepositoryAPI
|
||||
}
|
||||
|
||||
|
|
@ -90,12 +94,28 @@ func (llm *OllamaClient) ollamaCompletion(ctx context.Context, prompt string, fo
|
|||
if apiURL == "" {
|
||||
apiURL = "http://localhost:11434/api/chat"
|
||||
}
|
||||
|
||||
messages := []map[string]string{{"role": "user", "content": prompt}}
|
||||
//if os.Getenv("DISABLE_THINK") == "1" {
|
||||
// System message to suppress chain-of-thought style outputs.
|
||||
messages = append([]map[string]string{{
|
||||
"role": "system",
|
||||
"content": "You are a concise assistant. Output ONLY the final answer requested by the user. Do not include reasoning, analysis, or <think> tags.",
|
||||
}}, messages...)
|
||||
//}
|
||||
|
||||
body := map[string]interface{}{
|
||||
"model": llm.Model,
|
||||
"messages": []map[string]string{{"role": "user", "content": prompt}},
|
||||
"messages": messages,
|
||||
"stream": false,
|
||||
"format": format,
|
||||
}
|
||||
|
||||
// Optional: Add a stop sequence to prevent <think> tags if they appear
|
||||
if os.Getenv("DISABLE_THINK") == "1" {
|
||||
body["options"] = map[string]interface{}{"stop": []string{"<think>"}}
|
||||
}
|
||||
|
||||
jsonBody, _ := json.Marshal(body)
|
||||
req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody))
|
||||
if llm.APIKey != "" {
|
||||
|
|
@ -128,13 +148,44 @@ func (llm *OllamaClient) ollamaCompletion(ctx context.Context, prompt string, fo
|
|||
return "", fmt.Errorf("unrecognized LLM response format: %.200s", string(raw))
|
||||
}
|
||||
|
||||
func normalizeOllamaHost(raw string) string {
|
||||
if raw == "" {
|
||||
return "http://localhost:11434"
|
||||
}
|
||||
// strip trailing /api/* paths if user provided full endpoint
|
||||
lower := strings.ToLower(raw)
|
||||
for _, seg := range []string{"/api/chat", "/api/embeddings", "/api/generate"} {
|
||||
if strings.HasSuffix(lower, seg) {
|
||||
return raw[:len(raw)-len(seg)]
|
||||
}
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
func (llm *OllamaClient) GetEmbeddings(ctx context.Context, input string) ([]float64, error) {
|
||||
apiURL := llm.BaseURL
|
||||
if apiURL == "" {
|
||||
apiURL = "http://localhost:11434/api/embeddings"
|
||||
host := normalizeOllamaHost(llm.BaseURL)
|
||||
apiURL := host + "/api/embeddings"
|
||||
modelName := llm.Model
|
||||
if llm.EmbeddingModel != "" {
|
||||
modelName = llm.EmbeddingModel
|
||||
}
|
||||
// retry parameters (env override OLLAMA_EMBED_ATTEMPTS)
|
||||
maxAttempts := 5
|
||||
if v := os.Getenv("OLLAMA_EMBED_ATTEMPTS"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 && n < 20 {
|
||||
maxAttempts = n
|
||||
}
|
||||
}
|
||||
baseBackoff := 300 * time.Millisecond
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
body := map[string]interface{}{
|
||||
"model": llm.Model,
|
||||
"model": modelName,
|
||||
"prompt": input,
|
||||
}
|
||||
jsonBody, _ := json.Marshal(body)
|
||||
|
|
@ -144,27 +195,65 @@ func (llm *OllamaClient) GetEmbeddings(ctx context.Context, input string) ([]flo
|
|||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
resp, err := (&http.Client{}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
lastErr = err
|
||||
logrus.WithError(err).Warnf("[Ollama] embeddings request attempt=%d failed", attempt+1)
|
||||
} else {
|
||||
raw, rerr := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if rerr != nil {
|
||||
lastErr = rerr
|
||||
} else {
|
||||
var generic map[string]json.RawMessage
|
||||
if jerr := json.Unmarshal(raw, &generic); jerr != nil {
|
||||
lastErr = fmt.Errorf("unrecognized response (parse): %w", jerr)
|
||||
} else if embRaw, ok := generic["embedding"]; ok && len(embRaw) > 0 {
|
||||
var emb []float64
|
||||
if jerr := json.Unmarshal(embRaw, &emb); jerr != nil {
|
||||
lastErr = fmt.Errorf("failed to decode embedding: %w", jerr)
|
||||
} else if len(emb) == 0 {
|
||||
lastErr = fmt.Errorf("empty embedding returned")
|
||||
} else {
|
||||
return emb, nil
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
raw, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if drRaw, ok := generic["done_reason"]; ok {
|
||||
var reason string
|
||||
_ = json.Unmarshal(drRaw, &reason)
|
||||
if reason == "load" { // transient model loading state
|
||||
lastErr = fmt.Errorf("model loading")
|
||||
} else {
|
||||
lastErr = fmt.Errorf("unexpected done_reason=%s", reason)
|
||||
}
|
||||
var ollama struct {
|
||||
Embedding []float64 `json:"embedding"`
|
||||
Error string `json:"error"`
|
||||
} else if errRaw, ok := generic["error"]; ok {
|
||||
var errMsg string
|
||||
_ = json.Unmarshal(errRaw, &errMsg)
|
||||
if errMsg != "" {
|
||||
lastErr = fmt.Errorf("embedding error: %s", errMsg)
|
||||
} else {
|
||||
lastErr = fmt.Errorf("embedding error (empty message)")
|
||||
}
|
||||
if err := json.Unmarshal(raw, &ollama); err == nil && len(ollama.Embedding) > 0 {
|
||||
return ollama.Embedding, nil
|
||||
} else {
|
||||
lastErr = fmt.Errorf("unrecognized embedding response: %.200s", string(raw))
|
||||
}
|
||||
if ollama.Error != "" {
|
||||
return nil, fmt.Errorf("embedding error: %s", ollama.Error)
|
||||
}
|
||||
return nil, fmt.Errorf("unrecognized embedding response: %.200s", string(raw))
|
||||
}
|
||||
if lastErr == nil {
|
||||
break
|
||||
}
|
||||
// backoff if not last attempt
|
||||
if attempt < maxAttempts-1 {
|
||||
delay := baseBackoff << attempt
|
||||
if strings.Contains(strings.ToLower(lastErr.Error()), "model loading") {
|
||||
delay += 1 * time.Second
|
||||
}
|
||||
time.Sleep(delay)
|
||||
}
|
||||
}
|
||||
if lastErr == nil {
|
||||
lastErr = fmt.Errorf("embedding retrieval failed with no error info")
|
||||
}
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func (llm *OllamaClient) TranslateToEnglish(ctx context.Context, message string) (string, error) {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,10 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgconn"
|
||||
|
|
@ -263,6 +266,69 @@ func (r *PGChatRepository) CreateUser(ctx context.Context, username, passwordHas
|
|||
return err
|
||||
}
|
||||
|
||||
// InsertSentenceEmbedding inserts a sentence embedding if not already present (unique index on visit_id,sentence)
|
||||
func (r *PGChatRepository) InsertSentenceEmbedding(ctx context.Context, visitID, sentence, translated string, embedding []float64) error {
|
||||
if r == nil || r.pool == nil {
|
||||
return nil
|
||||
}
|
||||
l := len(embedding)
|
||||
if l != 384 && l != 1536 {
|
||||
err := fmt.Errorf("unsupported embedding length %d (expected 384 or 1536)", l)
|
||||
logrus.WithError(err).Warn("skipping sentence embedding insert")
|
||||
return err
|
||||
}
|
||||
// Build vector literal
|
||||
var b strings.Builder
|
||||
b.Grow(len(embedding)*8 + 2)
|
||||
b.WriteByte('[')
|
||||
for i, v := range embedding {
|
||||
if i > 0 {
|
||||
b.WriteByte(',')
|
||||
}
|
||||
b.WriteString(strconv.FormatFloat(v, 'f', -1, 64))
|
||||
}
|
||||
b.WriteByte(']')
|
||||
vecLiteral := b.String()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 6*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var sqlStmt string
|
||||
if l == 384 {
|
||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_384)
|
||||
VALUES ($1,$2,$3,$4::vector)
|
||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||
SET embedding_384 = EXCLUDED.embedding_384,
|
||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||
} else { // 1536
|
||||
sqlStmt = `INSERT INTO sentence_embeddings (visit_id, sentence, translated, embedding_1536)
|
||||
VALUES ($1,$2,$3,$4::vector)
|
||||
ON CONFLICT (visit_id, sentence) DO UPDATE
|
||||
SET embedding_1536 = EXCLUDED.embedding_1536,
|
||||
translated = COALESCE(sentence_embeddings.translated, EXCLUDED.translated)`
|
||||
}
|
||||
_, err := r.pool.Exec(ctx, sqlStmt, visitID, sentence, translated, vecLiteral)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("failed to upsert sentence embedding (dual columns)")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// ExistsSentenceEmbedding checks if a sentence embedding exists
|
||||
func (r *PGChatRepository) ExistsSentenceEmbedding(ctx context.Context, visitID, sentence string) (bool, error) {
|
||||
if r == nil || r.pool == nil {
|
||||
return false, nil
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
var exists bool
|
||||
err := r.pool.QueryRow(ctx, `SELECT EXISTS (SELECT 1 FROM sentence_embeddings WHERE visit_id=$1 AND sentence=$2)`, visitID, sentence).Scan(&exists)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return exists, nil
|
||||
}
|
||||
|
||||
// Close releases pool resources
|
||||
func (r *PGChatRepository) Close() {
|
||||
if r != nil && r.pool != nil {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,154 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var sentenceSplitRegex = regexp.MustCompile(`(?m)(?:[^.!?\n]+[.!?]|[^.!?\n]+$)`)
|
||||
|
||||
// configurable via env (seconds); defaults chosen for model cold start friendliness
|
||||
func envDuration(key string, def time.Duration) time.Duration {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
if d, err := time.ParseDuration(v); err == nil {
|
||||
return d
|
||||
}
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
// startSentenceEmbeddingBackfill launches a background goroutine that iterates all visits
|
||||
// and stores (visit_id, sentence, translated, embedding) records in sentence_embeddings table
|
||||
// if they do not already exist (relying on unique index ON CONFLICT DO NOTHING).
|
||||
func startSentenceEmbeddingBackfill(repo *PGChatRepository, llm LLMClientAPI, vdb *VisitDB) {
|
||||
if repo == nil || llm == nil || vdb == nil {
|
||||
logrus.Info("Sentence embedding backfill skipped (missing repo, llm or vdb)")
|
||||
return
|
||||
}
|
||||
if disable := strings.ToLower(os.Getenv("SENTENCE_BACKFILL_DISABLE")); disable == "1" || disable == "true" {
|
||||
logrus.Info("Sentence embedding backfill disabled via SENTENCE_BACKFILL_DISABLE env var")
|
||||
return
|
||||
}
|
||||
translateTimeout := envDuration("TRANSLATE_TIMEOUT", 45*time.Second)
|
||||
embeddingTimeout := envDuration("EMBEDDING_TIMEOUT", 45*time.Second)
|
||||
maxTranslateAttempts := 3
|
||||
maxEmbeddingAttempts := 3
|
||||
go func() {
|
||||
start := time.Now()
|
||||
logrus.WithFields(logrus.Fields{"translateTimeout": translateTimeout, "embeddingTimeout": embeddingTimeout}).Info("Sentence embedding backfill started")
|
||||
processed := 0
|
||||
inserted := 0
|
||||
skippedExisting := 0
|
||||
skippedDueToFailures := 0
|
||||
for _, visit := range vdb.visitsDB { // visitsDB accessible within package
|
||||
if strings.TrimSpace(visit.Visit) == "" {
|
||||
continue
|
||||
}
|
||||
sentences := extractSentences(visit.Visit)
|
||||
for _, s := range sentences {
|
||||
processed++
|
||||
trimmed := strings.TrimSpace(s)
|
||||
if len(trimmed) < 3 {
|
||||
continue
|
||||
}
|
||||
// Existence check before any LLM calls
|
||||
existsCtx, existsCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
exists, err := repo.ExistsSentenceEmbedding(existsCtx, visit.ID, trimmed)
|
||||
existsCancel()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warnf("Exists check failed visit=%s sentence=%q", visit.ID, trimmed)
|
||||
} else if exists {
|
||||
skippedExisting++
|
||||
continue
|
||||
}
|
||||
|
||||
// Translation with retry/backoff
|
||||
var translated string
|
||||
translateErr := retry(maxTranslateAttempts, 0, func(at int) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), translateTimeout)
|
||||
defer cancel()
|
||||
resp, err := llm.TranslateToEnglish(ctx, trimmed)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warnf("Translate attempt=%d failed visit=%s sentence=%q", at+1, visit.ID, trimmed)
|
||||
return err
|
||||
}
|
||||
translated = strings.TrimSpace(resp)
|
||||
return nil
|
||||
})
|
||||
if translateErr != nil || translated == "" {
|
||||
translated = trimmed // fallback keep original language
|
||||
}
|
||||
|
||||
// Embedding with retry/backoff (skip if translation totally failed with deadline each time)
|
||||
var emb []float64
|
||||
embErr := retry(maxEmbeddingAttempts, 0, func(at int) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), embeddingTimeout)
|
||||
defer cancel()
|
||||
vec, err := llm.GetEmbeddings(ctx, translated)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warnf("Embeddings attempt=%d failed visit=%s sentence=%q", at+1, visit.ID, trimmed)
|
||||
return err
|
||||
}
|
||||
emb = vec
|
||||
return nil
|
||||
})
|
||||
if embErr != nil {
|
||||
skippedDueToFailures++
|
||||
continue
|
||||
}
|
||||
|
||||
persistCtx, pcancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if err := repo.InsertSentenceEmbedding(persistCtx, visit.ID, trimmed, translated, emb); err == nil {
|
||||
inserted++
|
||||
}
|
||||
pcancel()
|
||||
// Throttle (configurable?)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
logrus.Infof("Sentence embedding backfill complete processed=%d inserted=%d skipped_existing=%d skipped_failures=%d elapsed=%s", processed, inserted, skippedExisting, skippedDueToFailures, time.Since(start))
|
||||
}()
|
||||
}
|
||||
|
||||
// retry executes fn up to attempts times with exponential backoff starting at base (or 200ms if base==0)
|
||||
func retry(attempts int, base time.Duration, fn func(attempt int) error) error {
|
||||
if attempts <= 0 {
|
||||
return nil
|
||||
}
|
||||
if base <= 0 {
|
||||
base = 200 * time.Millisecond
|
||||
}
|
||||
var err error
|
||||
for a := 0; a < attempts; a++ {
|
||||
err = fn(a)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
// backoff except after last attempt
|
||||
if a < attempts-1 {
|
||||
backoff := base << a // exponential
|
||||
time.Sleep(backoff)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// extractSentences splits a block of text into sentence-like units.
|
||||
func extractSentences(text string) []string {
|
||||
// First replace newlines with space to keep regex simpler, keep periods.
|
||||
normalized := strings.ReplaceAll(text, "\n", " ")
|
||||
matches := sentenceSplitRegex.FindAllString(normalized, -1)
|
||||
var out []string
|
||||
for _, m := range matches {
|
||||
m = strings.TrimSpace(m)
|
||||
if m != "" {
|
||||
out = append(out, m)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
Loading…
Reference in New Issue