diff --git a/.idea/copilotDiffState.xml b/.idea/copilotDiffState.xml new file mode 100644 index 0000000..fc8074a --- /dev/null +++ b/.idea/copilotDiffState.xml @@ -0,0 +1,18 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..318ac3b --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +# Vetrag + +Lightweight veterinary visit reasoning helper with LLM-assisted keyword extraction and disambiguation. + +## Features +- Switch seamlessly between local Ollama and OpenRouter (OpenAI-compatible) LLM backends by changing environment variables only. +- Structured JSON outputs enforced using provider-supported response formats (Ollama `format`, OpenAI/OpenRouter `response_format: { type: json_object }`). +- Integration tests using mock LLM & DB (no network dependency). +- GitHub Actions CI (vet, test, build). + +## Quick Start +### 1. Clone & build +```bash +git clone +cd vetrag +go build ./... +``` + +### 2. Prepare data +Ensure `config.yaml` and `maindb.yaml` / `db.yaml` exist as provided. Visit data is loaded at runtime (see `models.go` / `db.go`). + +### 3. Run with Ollama (local) +Pull or have a model available (example: `ollama pull qwen2.5`): +```bash +export OPENAI_BASE_URL=http://localhost:11434/api/chat +export OPENAI_MODEL=qwen2.5:latest +# API key not required for Ollama +export OPENAI_API_KEY= + +go run . +``` + +### 4. Run with OpenRouter +Sign up at https://openrouter.ai and get an API key. +```bash +export OPENAI_BASE_URL=https://openrouter.ai/api/v1/chat/completions +export OPENAI_API_KEY=sk-or-XXXXXXXXXXXXXXXX +export OPENAI_MODEL=meta-llama/llama-3.1-70b-instruct # or any supported model + +go run . +``` +Open http://localhost:8080/ in your browser. + +### 5. Health & Chat +```bash +curl -s http://localhost:8080/health +curl -s -X POST http://localhost:8080/chat -H 'Content-Type: application/json' -d '{"message":"my dog has diarrhea"}' | jq +``` + +## Environment Variables +| Variable | Purpose | Default (if empty) | +|----------|---------|--------------------| +| OPENAI_BASE_URL | LLM endpoint (Ollama chat or OpenRouter chat completions) | `http://localhost:11434/api/chat` | +| OPENAI_API_KEY | Bearer token for OpenRouter/OpenAI-style APIs | (unused if empty) | +| OPENAI_MODEL | Model identifier (Ollama model tag or OpenRouter model slug) | none (must set for remote) | + +## How Backend Selection Works +`llm.go` auto-detects the style: +- If the base URL contains `openrouter.ai` or `/v1/` it uses OpenAI-style request & parses `choices[0].message.content`. +- Otherwise it assumes Ollama and posts to `/api/chat` with `format` for structured JSON. + +## Structured Output +We define a JSON Schema-like map internally and: +- Ollama: send as `format` (native structured output extension). +- OpenRouter/OpenAI: send `response_format: { type: "json_object" }` plus a system instruction describing the expected keys. + +## Prompts +Prompts in `config.yaml` have been adjusted to explicitly demand JSON only. This reduces hallucinated prose and plays well with both backends. + +## Testing +Run: +```bash +go test ./... +``` +All tests mock the LLM so no network is required. + +## CI +GitHub Actions workflow at `.github/workflows/ci.yml` runs vet, tests, build on push/PR. + +## Troubleshooting +| Symptom | Cause | Fix | +|---------|-------|-----| +| Provider error referencing `response_format` and `json_schema` | Some providers reject `json_schema` | We now default to `json_object`; ensure you pulled latest changes. | +| Empty response | Model returned non-JSON or empty content | Enable debug logs (see below) and inspect raw response. | +| Non-JSON content (code fences) | Model ignored instruction | Try a stricter system message or switch to a model with better JSON adherence. | + +### Enable Debug Logging +Temporarily edit `main.go`: +```go +logrus.SetLevel(logrus.DebugLevel) +``` +(You can also refactor later to read a LOG_LEVEL env var.) + +### Sanitizing Output (Optional Future Improvement) +If some models wrap JSON in text, a post-processor could strip code fences and re-parse. Not implemented yet to keep logic strict. + +## Next Ideas +- Add retry with exponential backoff for transient 5xx. +- Add optional `json` fallback if a provider rejects `json_object`. +- Add streaming support. +- Add integration test with recorded OpenRouter fixture. + +## License +(Choose and add a LICENSE file if planning to open source.) + diff --git a/config.yaml b/config.yaml index 561afb2..7bd8c92 100644 --- a/config.yaml +++ b/config.yaml @@ -1,4 +1,3 @@ llm: - extract_keywords_prompt: "Translate [{{.Message}}] to English, then output only 3–5 comma-separated veterinary-related keywords IN ENGLISH derived strictly from [{{.Message}}]. example output [\"keyword1\",\"keyword2\"] No other text, no extra punctuation, no explanations, no quotes, no formatting." - disambiguate_prompt: "Given these possible vet visit reasons: [{{.Entries}}], choose the single best match for this user message: {{.Message}}. Reply with id ex {\"visitReason\":\"bloodwork\"} No other text, no extra punctuation, no explanations, no quotes, no formatting." - + extract_keywords_prompt: "You will extract structured data from the user input. Input text: {{.Message}}. Return ONLY valid minified JSON object with keys: translate (English translation of input), keyword (array of 3-5 concise English veterinary-related keywords derived strictly from the input), animal (animal mentioned or 'unknown'). Example: {\"translate\":\"dog has diarrhea\",\"keyword\":[\"diarrhea\",\"digestive\"],\"animal\":\"dog\"}. Do not add extra text, markdown, or quotes outside JSON." + disambiguate_prompt: "Given candidate visit entries (JSON array): {{.Entries}} and user message: {{.Message}} choose the best matching visit's ID. Return ONLY JSON: {\"visitReason\":\"\"}. No other text." diff --git a/llm.go b/llm.go index ab6a811..0489687 100644 --- a/llm.go +++ b/llm.go @@ -9,6 +9,7 @@ import ( "net/http" "strings" "text/template" + "time" "github.com/sirupsen/logrus" ) @@ -116,26 +117,32 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma isOpenAIStyle := strings.Contains(apiURL, "openrouter.ai") || strings.Contains(apiURL, "/v1/") - // Build request body depending on style - var body map[string]interface{} - if isOpenAIStyle { - // OpenAI / OpenRouter style (chat.completions) - // Use response_format with JSON schema when provided. - responseFormat := map[string]interface{}{ - "type": "json_schema", - "json_schema": map[string]interface{}{ - "name": "structured_output", - "schema": format, - }, + // Helper to stringify the expected JSON schema for instructions + schemaDesc := func() string { + b, _ := json.MarshalIndent(format, "", " ") + return string(b) + } + + truncate := func(s string, n int) string { + if len(s) <= n { + return s } - body = map[string]interface{}{ - "model": llm.Model, - "messages": []map[string]string{{"role": "user", "content": prompt}}, - "response_format": responseFormat, + return s[:n] + "..." + } + + buildBody := func() map[string]interface{} { + if isOpenAIStyle { + return map[string]interface{}{ + "model": llm.Model, + "messages": []map[string]string{ + {"role": "system", "content": "You are a strict JSON generator. ONLY output valid JSON matching this schema: " + schemaDesc() + " Do not add explanations."}, + {"role": "user", "content": prompt}, + }, + "response_format": map[string]interface{}{"type": "json_object"}, + } } - } else { - // Ollama structured output extension - body = map[string]interface{}{ + // Ollama style + return map[string]interface{}{ "model": llm.Model, "messages": []map[string]string{{"role": "user", "content": prompt}}, "stream": false, @@ -143,46 +150,85 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma } } - jsonBody, _ := json.Marshal(body) - logrus.WithFields(logrus.Fields{"api_url": apiURL, "prompt": prompt, "is_openai_style": isOpenAIStyle}).Info("[LLM] completion POST") + body := buildBody() - req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody)) - if llm.APIKey != "" { - // OpenRouter expects: Authorization: Bearer sk-... or OR-... depending on key type - req.Header.Set("Authorization", "Bearer "+llm.APIKey) + doRequest := func(body map[string]interface{}) (raw []byte, status int, err error, dur time.Duration) { + jsonBody, _ := json.Marshal(body) + bodySize := len(jsonBody) + logrus.WithFields(logrus.Fields{ + "event": "llm_request", + "api_url": apiURL, + "model": llm.Model, + "is_openai_style": isOpenAIStyle, + "prompt_len": len(prompt), + "body_size": bodySize, + }).Info("[LLM] sending request") + req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody)) + if llm.APIKey != "" { + req.Header.Set("Authorization", "Bearer "+llm.APIKey) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + if strings.Contains(apiURL, "openrouter.ai") { + req.Header.Set("Referer", "https://github.com/") + req.Header.Set("X-Title", "vetrag-app") + } + start := time.Now() + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, 0, err, time.Since(start) + } + defer resp.Body.Close() + raw, rerr := io.ReadAll(resp.Body) + return raw, resp.StatusCode, rerr, time.Since(start) } - req.Header.Set("Content-Type", "application/json") - client := &http.Client{} - resp, err := client.Do(req) + raw, status, err, dur := doRequest(body) if err != nil { - logrus.WithError(err).Error("[LLM] completion HTTP error") + logrus.WithFields(logrus.Fields{ + "event": "llm_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "error": err, + }).Error("[LLM] request failed") return "", err } - defer resp.Body.Close() + logrus.WithFields(logrus.Fields{ + "event": "llm_raw_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "raw_trunc": truncate(string(raw), 600), + "raw_len": len(raw), + }).Debug("[LLM] raw response body") - raw, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed reading response body: %w", err) - } - logrus.WithFields(logrus.Fields{"status": resp.StatusCode, "raw": string(raw)}).Debug("[LLM] completion raw response") + parseVariant := "unknown" - // Attempt Ollama format first (backwards compatible) + // Attempt Ollama format parse var ollama struct { Message struct { Content string `json:"content"` } `json:"message"` + Error string `json:"error"` } if err := json.Unmarshal(raw, &ollama); err == nil && ollama.Message.Content != "" { - logrus.WithField("content", ollama.Message.Content).Info("[LLM] completion (ollama) parsed") - return ollama.Message.Content, nil + parseVariant = "ollama" + content := ollama.Message.Content + logrus.WithFields(logrus.Fields{ + "event": "llm_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "parse_variant": parseVariant, + "content_len": len(content), + "content_snip": truncate(content, 300), + }).Info("[LLM] parsed response") + return content, nil } - // Attempt OpenAI / OpenRouter style + // Attempt OpenAI/OpenRouter style parse var openAI struct { Choices []struct { Message struct { - Role string `json:"role"` Content string `json:"content"` } `json:"message"` } `json:"choices"` @@ -192,17 +238,46 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma } `json:"error"` } if err := json.Unmarshal(raw, &openAI); err == nil { - if openAI.Error != nil { - return "", fmt.Errorf("provider error: %s (%s)", openAI.Error.Message, openAI.Error.Type) + if openAI.Error != nil || status >= 400 { + parseVariant = "openai" + var msg string + if openAI.Error != nil { + msg = openAI.Error.Message + } else { + msg = string(raw) + } + logrus.WithFields(logrus.Fields{ + "event": "llm_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "parse_variant": parseVariant, + "error": msg, + }).Error("[LLM] provider error") + return "", fmt.Errorf("provider error: %s", msg) } if len(openAI.Choices) > 0 && openAI.Choices[0].Message.Content != "" { + parseVariant = "openai" content := openAI.Choices[0].Message.Content - logrus.WithField("content", content).Info("[LLM] completion (openai) parsed") + logrus.WithFields(logrus.Fields{ + "event": "llm_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "parse_variant": parseVariant, + "content_len": len(content), + "content_snip": truncate(content, 300), + }).Info("[LLM] parsed response") return content, nil } } - // If still nothing, return error with snippet + logrus.WithFields(logrus.Fields{ + "event": "llm_response", + "status": status, + "latency_ms": dur.Milliseconds(), + "parse_variant": parseVariant, + "raw_snip": truncate(string(raw), 300), + }).Error("[LLM] unrecognized response format") + return "", fmt.Errorf("unrecognized LLM response format: %.200s", string(raw)) } diff --git a/openrouter_integration_test.go b/openrouter_integration_test.go new file mode 100644 index 0000000..e999d36 --- /dev/null +++ b/openrouter_integration_test.go @@ -0,0 +1,89 @@ +package main + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +// Test OpenAI/OpenRouter style success response parsing +func TestLLMClient_OpenRouterStyle_ExtractKeywords(t *testing.T) { + // Save and restore original config + orig := appConfig + defer func() { appConfig = orig }() + + appConfig.LLM.ExtractKeywordsPrompt = "Dummy {{.Message}}" // simple template + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/chat/completions" { + w.WriteHeader(http.StatusNotFound) + return + } + // Optionally verify header presence + if got := r.Header.Get("Authorization"); got == "" { + w.WriteHeader(http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + resp := map[string]interface{}{ + "choices": []map[string]interface{}{ + { + "message": map[string]interface{}{ + "role": "assistant", + "content": `{"translate":"dog has diarrhea","keyword":["diarrhea","digestive"],"animal":"dog"}`, + }, + }, + }, + } + json.NewEncoder(w).Encode(resp) + })) + defer ts.Close() + + llm := NewLLMClient("test-key", ts.URL+"/v1/chat/completions", "meta-llama/test") + res, err := llm.ExtractKeywords(context.Background(), "kutya hasmenés") + if err != nil { + te(t, "unexpected error: %v", err) + } + if res["translate"] != "dog has diarrhea" { + te(t, "translate mismatch: %v", res["translate"]) + } + kw, ok := res["keyword"].([]interface{}) + if !ok || len(kw) != 2 || kw[0] != "diarrhea" { + te(t, "keyword list mismatch: %#v", res["keyword"]) + } + if res["animal"] != "dog" { + te(t, "animal mismatch: %v", res["animal"]) + } +} + +// Test OpenAI/OpenRouter style error response handling +func TestLLMClient_OpenRouterStyle_Error(t *testing.T) { + orig := appConfig + defer func() { appConfig = orig }() + appConfig.LLM.ExtractKeywordsPrompt = "Dummy {{.Message}}" + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusTooManyRequests) + json.NewEncoder(w).Encode(map[string]interface{}{ + "error": map[string]interface{}{ + "message": "Rate limit", + "type": "rate_limit", + }, + }) + })) + defer ts.Close() + + llm := NewLLMClient("test-key", ts.URL+"/v1/chat/completions", "meta-llama/test") + _, err := llm.ExtractKeywords(context.Background(), "test") + if err == nil || !contains(err.Error(), "Rate limit") { + te(t, "expected rate limit error, got: %v", err) + } +} + +// --- helpers --- +func contains(haystack, needle string) bool { return strings.Contains(haystack, needle) } +func te(t *testing.T, format string, args ...interface{}) { t.Helper(); t.Fatalf(format, args...) } diff --git a/visits.bleve/store/000000000006.zap b/visits.bleve/store/000000000006.zap deleted file mode 100644 index 659acfd..0000000 Binary files a/visits.bleve/store/000000000006.zap and /dev/null differ diff --git a/visits.bleve/store/root.bolt b/visits.bleve/store/root.bolt index 16f4475..0b85963 100644 Binary files a/visits.bleve/store/root.bolt and b/visits.bleve/store/root.bolt differ