From 2bd73332337bd3f0ec5af06aa247d8a86bb5df6b Mon Sep 17 00:00:00 2001 From: lehel Date: Wed, 8 Oct 2025 15:08:35 +0200 Subject: [PATCH] new table + translate call --- config.go | 1 + config/config.yaml | 1 + llm.go | 1 + .../0003_create_sentence_embeddings_table.up.sql | 16 ++++++++++++++++ ollama_client.go | 16 ++++++++++++++++ openai_client.go | 16 ++++++++++++++++ 6 files changed, 51 insertions(+) create mode 100644 migrations/0003_create_sentence_embeddings_table.up.sql diff --git a/config.go b/config.go index df22298..0cb9568 100644 --- a/config.go +++ b/config.go @@ -8,6 +8,7 @@ type Config struct { LLM struct { ExtractKeywordsPrompt string `yaml:"extract_keywords_prompt"` DisambiguatePrompt string `yaml:"disambiguate_prompt"` + TranslatePrompt string `yaml:"translate_prompt"` } `yaml:"llm"` } diff --git a/config/config.yaml b/config/config.yaml index 7bd8c92..5d42153 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,3 +1,4 @@ llm: extract_keywords_prompt: "You will extract structured data from the user input. Input text: {{.Message}}. Return ONLY valid minified JSON object with keys: translate (English translation of input), keyword (array of 3-5 concise English veterinary-related keywords derived strictly from the input), animal (animal mentioned or 'unknown'). Example: {\"translate\":\"dog has diarrhea\",\"keyword\":[\"diarrhea\",\"digestive\"],\"animal\":\"dog\"}. Do not add extra text, markdown, or quotes outside JSON." disambiguate_prompt: "Given candidate visit entries (JSON array): {{.Entries}} and user message: {{.Message}} choose the best matching visit's ID. Return ONLY JSON: {\"visitReason\":\"\"}. No other text." + translate_prompt: "Translate the following veterinary-related sentence to English. Input: '{{.Message}}'. Return ONLY the English translation, no extra text, no markdown, no quotes. If already English, return as is." diff --git a/llm.go b/llm.go index 837de18..7311786 100644 --- a/llm.go +++ b/llm.go @@ -15,6 +15,7 @@ type LLMClientAPI interface { ExtractKeywords(ctx context.Context, message string) (map[string]interface{}, error) DisambiguateBestMatch(ctx context.Context, message string, candidates []Visit) (string, error) GetEmbeddings(ctx context.Context, input string) ([]float64, error) + TranslateToEnglish(ctx context.Context, message string) (string, error) } // --- Format Utilities --- diff --git a/migrations/0003_create_sentence_embeddings_table.up.sql b/migrations/0003_create_sentence_embeddings_table.up.sql new file mode 100644 index 0000000..36c4990 --- /dev/null +++ b/migrations/0003_create_sentence_embeddings_table.up.sql @@ -0,0 +1,16 @@ +-- +goose Up +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE sentence_embeddings ( + id SERIAL PRIMARY KEY, + visit_id INTEGER NOT NULL, + sentence TEXT NOT NULL, + translated TEXT, + embeddings VECTOR(1536) NOT NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- +goose Down +DROP TABLE IF EXISTS sentence_embeddings; +DROP EXTENSION IF EXISTS vector; + diff --git a/ollama_client.go b/ollama_client.go index a455c92..0b4d786 100644 --- a/ollama_client.go +++ b/ollama_client.go @@ -166,3 +166,19 @@ func (llm *OllamaClient) GetEmbeddings(ctx context.Context, input string) ([]flo } return nil, fmt.Errorf("unrecognized embedding response: %.200s", string(raw)) } + +func (llm *OllamaClient) TranslateToEnglish(ctx context.Context, message string) (string, error) { + prompt, err := renderPrompt(appConfig.LLM.TranslatePrompt, map[string]string{"Message": message}) + if err != nil { + logrus.WithError(err).Error("[CONFIG] Failed to render Translate prompt") + return "", err + } + logrus.WithField("prompt", prompt).Info("[LLM] TranslateToEnglish prompt") + + resp, err := llm.ollamaCompletion(ctx, prompt, nil) + logrus.WithFields(logrus.Fields{"response": resp, "err": err}).Info("[LLM] TranslateToEnglish response") + if err != nil { + return resp, err + } + return strings.TrimSpace(resp), nil +} diff --git a/openai_client.go b/openai_client.go index 43a9f29..cce9251 100644 --- a/openai_client.go +++ b/openai_client.go @@ -198,3 +198,19 @@ func (llm *OpenAIClient) GetEmbeddings(ctx context.Context, input string) ([]flo } return nil, fmt.Errorf("unrecognized embedding response: %.200s", string(raw)) } + +func (llm *OpenAIClient) TranslateToEnglish(ctx context.Context, message string) (string, error) { + prompt, err := renderPrompt(appConfig.LLM.TranslatePrompt, map[string]string{"Message": message}) + if err != nil { + logrus.WithError(err).Error("[CONFIG] Failed to render Translate prompt") + return "", err + } + logrus.WithField("prompt", prompt).Info("[LLM] TranslateToEnglish prompt") + + resp, err := llm.openAICompletion(ctx, prompt, nil) + logrus.WithFields(logrus.Fields{"response": resp, "err": err}).Info("[LLM] TranslateToEnglish response") + if err != nil { + return resp, err + } + return strings.TrimSpace(resp), nil +}