added better logging + openrouter call handling

2025-10-01 16:59:30 +02:00 · 2025-10-01 16:59:30 +02:00 · b55decb633
parent 4647a3ad43
commit b55decb633
7 changed files with 332 additions and 46 deletions
--- a/.idea/copilotDiffState.xml
+++ b/.idea/copilotDiffState.xml
--- a/README.md
+++ b/README.md
@ -0,0 +1,105 @@
+# Vetrag
+
+Lightweight veterinary visit reasoning helper with LLM-assisted keyword extraction and disambiguation.
+
+## Features
+- Switch seamlessly between local Ollama and OpenRouter (OpenAI-compatible) LLM backends by changing environment variables only.
+- Structured JSON outputs enforced using provider-supported response formats (Ollama `format`, OpenAI/OpenRouter `response_format: { type: json_object }`).
+- Integration tests using mock LLM & DB (no network dependency).
+- GitHub Actions CI (vet, test, build).
+
+## Quick Start
+### 1. Clone & build
+```bash
+git clone <repo-url>
+cd vetrag
+go build ./...
+```
+
+### 2. Prepare data
+Ensure `config.yaml` and `maindb.yaml` / `db.yaml` exist as provided. Visit data is loaded at runtime (see `models.go` / `db.go`).
+
+### 3. Run with Ollama (local)
+Pull or have a model available (example: `ollama pull qwen2.5`):
+```bash
+export OPENAI_BASE_URL=http://localhost:11434/api/chat
+export OPENAI_MODEL=qwen2.5:latest
+# API key not required for Ollama
+export OPENAI_API_KEY=
+
+go run .
+```
+
+### 4. Run with OpenRouter
+Sign up at https://openrouter.ai and get an API key.
+```bash
+export OPENAI_BASE_URL=https://openrouter.ai/api/v1/chat/completions
+export OPENAI_API_KEY=sk-or-XXXXXXXXXXXXXXXX
+export OPENAI_MODEL=meta-llama/llama-3.1-70b-instruct  # or any supported model
+
+go run .
+```
+Open http://localhost:8080/ in your browser.
+
+### 5. Health & Chat
+```bash
+curl -s http://localhost:8080/health
+curl -s -X POST http://localhost:8080/chat -H 'Content-Type: application/json' -d '{"message":"my dog has diarrhea"}' | jq
+```
+
+## Environment Variables
+| Variable | Purpose | Default (if empty) |
+|----------|---------|--------------------|
+| OPENAI_BASE_URL | LLM endpoint (Ollama chat or OpenRouter chat completions) | `http://localhost:11434/api/chat` |
+| OPENAI_API_KEY  | Bearer token for OpenRouter/OpenAI-style APIs | (unused if empty) |
+| OPENAI_MODEL    | Model identifier (Ollama model tag or OpenRouter model slug) | none (must set for remote) |
+
+## How Backend Selection Works
+`llm.go` auto-detects the style:
+- If the base URL contains `openrouter.ai` or `/v1/` it uses OpenAI-style request & parses `choices[0].message.content`.
+- Otherwise it assumes Ollama and posts to `/api/chat` with `format` for structured JSON.
+
+## Structured Output
+We define a JSON Schema-like map internally and:
+- Ollama: send as `format` (native structured output extension).
+- OpenRouter/OpenAI: send `response_format: { type: "json_object" }` plus a system instruction describing the expected keys.
+
+## Prompts
+Prompts in `config.yaml` have been adjusted to explicitly demand JSON only. This reduces hallucinated prose and plays well with both backends.
+
+## Testing
+Run:
+```bash
+go test ./...
+```
+All tests mock the LLM so no network is required.
+
+## CI
+GitHub Actions workflow at `.github/workflows/ci.yml` runs vet, tests, build on push/PR.
+
+## Troubleshooting
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| Provider error referencing `response_format` and `json_schema` | Some providers reject `json_schema` | We now default to `json_object`; ensure you pulled latest changes. |
+| Empty response | Model returned non-JSON or empty content | Enable debug logs (see below) and inspect raw response. |
+| Non-JSON content (code fences) | Model ignored instruction | Try a stricter system message or switch to a model with better JSON adherence. |
+
+### Enable Debug Logging
+Temporarily edit `main.go`:
+```go
+logrus.SetLevel(logrus.DebugLevel)
+```
+(You can also refactor later to read a LOG_LEVEL env var.)
+
+### Sanitizing Output (Optional Future Improvement)
+If some models wrap JSON in text, a post-processor could strip code fences and re-parse. Not implemented yet to keep logic strict.
+
+## Next Ideas
+- Add retry with exponential backoff for transient 5xx.
+- Add optional `json` fallback if a provider rejects `json_object`.
+- Add streaming support.
+- Add integration test with recorded OpenRouter fixture.
+
+## License
+(Choose and add a LICENSE file if planning to open source.)
+
--- a/config.yaml
+++ b/config.yaml
@ -1,4 +1,3 @@
 llm:
-  extract_keywords_prompt: "Translate [{{.Message}}] to English, then output only 3–5 comma-separated veterinary-related keywords IN ENGLISH derived strictly from [{{.Message}}]. example output [\"keyword1\",\"keyword2\"] No other text, no extra punctuation, no explanations, no quotes, no formatting."
-  disambiguate_prompt: "Given these possible vet visit reasons: [{{.Entries}}], choose the single best match for this user message: {{.Message}}. Reply with id ex {\"visitReason\":\"bloodwork\"} No other text, no extra punctuation, no explanations, no quotes, no formatting."
-
+  extract_keywords_prompt: "You will extract structured data from the user input. Input text: {{.Message}}. Return ONLY valid minified JSON object with keys: translate (English translation of input), keyword (array of 3-5 concise English veterinary-related keywords derived strictly from the input), animal (animal mentioned or 'unknown'). Example: {\"translate\":\"dog has diarrhea\",\"keyword\":[\"diarrhea\",\"digestive\"],\"animal\":\"dog\"}. Do not add extra text, markdown, or quotes outside JSON."
+  disambiguate_prompt: "Given candidate visit entries (JSON array): {{.Entries}} and user message: {{.Message}} choose the best matching visit's ID. Return ONLY JSON: {\"visitReason\":\"<one of the candidate IDs or empty string if none>\"}. No other text."
--- a/llm.go
+++ b/llm.go
@ -9,6 +9,7 @@ import (
 	"net/http"
 	"strings"
 	"text/template"
+	"time"

 	"github.com/sirupsen/logrus"
 )
@ -116,26 +117,32 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma

 	isOpenAIStyle := strings.Contains(apiURL, "openrouter.ai") || strings.Contains(apiURL, "/v1/")

-	// Build request body depending on style
-	var body map[string]interface{}
+	// Helper to stringify the expected JSON schema for instructions
+	schemaDesc := func() string {
+		b, _ := json.MarshalIndent(format, "", "  ")
+		return string(b)
+	}
+
+	truncate := func(s string, n int) string {
+		if len(s) <= n {
+			return s
+		}
+		return s[:n] + "...<truncated>"
+	}
+
+	buildBody := func() map[string]interface{} {
 		if isOpenAIStyle {
-		// OpenAI / OpenRouter style (chat.completions)
-		// Use response_format with JSON schema when provided.
-		responseFormat := map[string]interface{}{
-			"type": "json_schema",
-			"json_schema": map[string]interface{}{
-				"name":   "structured_output",
-				"schema": format,
-			},
-		}
-		body = map[string]interface{}{
+			return map[string]interface{}{
 				"model": llm.Model,
-			"messages":        []map[string]string{{"role": "user", "content": prompt}},
-			"response_format": responseFormat,
+				"messages": []map[string]string{
+					{"role": "system", "content": "You are a strict JSON generator. ONLY output valid JSON matching this schema: " + schemaDesc() + " Do not add explanations."},
+					{"role": "user", "content": prompt},
+				},
+				"response_format": map[string]interface{}{"type": "json_object"},
 			}
-	} else {
-		// Ollama structured output extension
-		body = map[string]interface{}{
+		}
+		// Ollama style
+		return map[string]interface{}{
 			"model":    llm.Model,
 			"messages": []map[string]string{{"role": "user", "content": prompt}},
 			"stream":   false,
@ -143,46 +150,85 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma
 		}
 	}

-	jsonBody, _ := json.Marshal(body)
-	logrus.WithFields(logrus.Fields{"api_url": apiURL, "prompt": prompt, "is_openai_style": isOpenAIStyle}).Info("[LLM] completion POST")
+	body := buildBody()

+	doRequest := func(body map[string]interface{}) (raw []byte, status int, err error, dur time.Duration) {
+		jsonBody, _ := json.Marshal(body)
+		bodySize := len(jsonBody)
+		logrus.WithFields(logrus.Fields{
+			"event":           "llm_request",
+			"api_url":         apiURL,
+			"model":           llm.Model,
+			"is_openai_style": isOpenAIStyle,
+			"prompt_len":      len(prompt),
+			"body_size":       bodySize,
+		}).Info("[LLM] sending request")
 		req, _ := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, bytes.NewBuffer(jsonBody))
 		if llm.APIKey != "" {
-		// OpenRouter expects: Authorization: Bearer sk-... or OR-... depending on key type
 			req.Header.Set("Authorization", "Bearer "+llm.APIKey)
 		}
 		req.Header.Set("Content-Type", "application/json")
-
+		req.Header.Set("Accept", "application/json")
+		if strings.Contains(apiURL, "openrouter.ai") {
+			req.Header.Set("Referer", "https://github.com/")
+			req.Header.Set("X-Title", "vetrag-app")
+		}
+		start := time.Now()
 		client := &http.Client{}
 		resp, err := client.Do(req)
 		if err != nil {
-		logrus.WithError(err).Error("[LLM] completion HTTP error")
-		return "", err
+			return nil, 0, err, time.Since(start)
 		}
 		defer resp.Body.Close()
-
-	raw, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return "", fmt.Errorf("failed reading response body: %w", err)
+		raw, rerr := io.ReadAll(resp.Body)
+		return raw, resp.StatusCode, rerr, time.Since(start)
 	}
-	logrus.WithFields(logrus.Fields{"status": resp.StatusCode, "raw": string(raw)}).Debug("[LLM] completion raw response")

-	// Attempt Ollama format first (backwards compatible)
+	raw, status, err, dur := doRequest(body)
+	if err != nil {
+		logrus.WithFields(logrus.Fields{
+			"event":      "llm_response",
+			"status":     status,
+			"latency_ms": dur.Milliseconds(),
+			"error":      err,
+		}).Error("[LLM] request failed")
+		return "", err
+	}
+	logrus.WithFields(logrus.Fields{
+		"event":      "llm_raw_response",
+		"status":     status,
+		"latency_ms": dur.Milliseconds(),
+		"raw_trunc":  truncate(string(raw), 600),
+		"raw_len":    len(raw),
+	}).Debug("[LLM] raw response body")
+
+	parseVariant := "unknown"
+
+	// Attempt Ollama format parse
 	var ollama struct {
 		Message struct {
 			Content string `json:"content"`
 		} `json:"message"`
+		Error string `json:"error"`
 	}
 	if err := json.Unmarshal(raw, &ollama); err == nil && ollama.Message.Content != "" {
-		logrus.WithField("content", ollama.Message.Content).Info("[LLM] completion (ollama) parsed")
-		return ollama.Message.Content, nil
+		parseVariant = "ollama"
+		content := ollama.Message.Content
+		logrus.WithFields(logrus.Fields{
+			"event":         "llm_response",
+			"status":        status,
+			"latency_ms":    dur.Milliseconds(),
+			"parse_variant": parseVariant,
+			"content_len":   len(content),
+			"content_snip":  truncate(content, 300),
+		}).Info("[LLM] parsed response")
+		return content, nil
 	}

-	// Attempt OpenAI / OpenRouter style
+	// Attempt OpenAI/OpenRouter style parse
 	var openAI struct {
 		Choices []struct {
 			Message struct {
-				Role    string `json:"role"`
 				Content string `json:"content"`
 			} `json:"message"`
 		} `json:"choices"`
@ -192,17 +238,46 @@ func (llm *LLMClient) openAICompletion(ctx context.Context, prompt string, forma
 		} `json:"error"`
 	}
 	if err := json.Unmarshal(raw, &openAI); err == nil {
+		if openAI.Error != nil || status >= 400 {
+			parseVariant = "openai"
+			var msg string
 			if openAI.Error != nil {
-			return "", fmt.Errorf("provider error: %s (%s)", openAI.Error.Message, openAI.Error.Type)
+				msg = openAI.Error.Message
+			} else {
+				msg = string(raw)
+			}
+			logrus.WithFields(logrus.Fields{
+				"event":         "llm_response",
+				"status":        status,
+				"latency_ms":    dur.Milliseconds(),
+				"parse_variant": parseVariant,
+				"error":         msg,
+			}).Error("[LLM] provider error")
+			return "", fmt.Errorf("provider error: %s", msg)
 		}
 		if len(openAI.Choices) > 0 && openAI.Choices[0].Message.Content != "" {
+			parseVariant = "openai"
 			content := openAI.Choices[0].Message.Content
-			logrus.WithField("content", content).Info("[LLM] completion (openai) parsed")
+			logrus.WithFields(logrus.Fields{
+				"event":         "llm_response",
+				"status":        status,
+				"latency_ms":    dur.Milliseconds(),
+				"parse_variant": parseVariant,
+				"content_len":   len(content),
+				"content_snip":  truncate(content, 300),
+			}).Info("[LLM] parsed response")
 			return content, nil
 		}
 	}

-	// If still nothing, return error with snippet
+	logrus.WithFields(logrus.Fields{
+		"event":         "llm_response",
+		"status":        status,
+		"latency_ms":    dur.Milliseconds(),
+		"parse_variant": parseVariant,
+		"raw_snip":      truncate(string(raw), 300),
+	}).Error("[LLM] unrecognized response format")
+
 	return "", fmt.Errorf("unrecognized LLM response format: %.200s", string(raw))
 }

--- a/openrouter_integration_test.go
+++ b/openrouter_integration_test.go
@ -0,0 +1,89 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+// Test OpenAI/OpenRouter style success response parsing
+func TestLLMClient_OpenRouterStyle_ExtractKeywords(t *testing.T) {
+	// Save and restore original config
+	orig := appConfig
+	defer func() { appConfig = orig }()
+
+	appConfig.LLM.ExtractKeywordsPrompt = "Dummy {{.Message}}" // simple template
+
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/v1/chat/completions" {
+			w.WriteHeader(http.StatusNotFound)
+			return
+		}
+		// Optionally verify header presence
+		if got := r.Header.Get("Authorization"); got == "" {
+			w.WriteHeader(http.StatusUnauthorized)
+			return
+		}
+		w.Header().Set("Content-Type", "application/json")
+		resp := map[string]interface{}{
+			"choices": []map[string]interface{}{
+				{
+					"message": map[string]interface{}{
+						"role":    "assistant",
+						"content": `{"translate":"dog has diarrhea","keyword":["diarrhea","digestive"],"animal":"dog"}`,
+					},
+				},
+			},
+		}
+		json.NewEncoder(w).Encode(resp)
+	}))
+	defer ts.Close()
+
+	llm := NewLLMClient("test-key", ts.URL+"/v1/chat/completions", "meta-llama/test")
+	res, err := llm.ExtractKeywords(context.Background(), "kutya hasmenés")
+	if err != nil {
+		te(t, "unexpected error: %v", err)
+	}
+	if res["translate"] != "dog has diarrhea" {
+		te(t, "translate mismatch: %v", res["translate"])
+	}
+	kw, ok := res["keyword"].([]interface{})
+	if !ok || len(kw) != 2 || kw[0] != "diarrhea" {
+		te(t, "keyword list mismatch: %#v", res["keyword"])
+	}
+	if res["animal"] != "dog" {
+		te(t, "animal mismatch: %v", res["animal"])
+	}
+}
+
+// Test OpenAI/OpenRouter style error response handling
+func TestLLMClient_OpenRouterStyle_Error(t *testing.T) {
+	orig := appConfig
+	defer func() { appConfig = orig }()
+	appConfig.LLM.ExtractKeywordsPrompt = "Dummy {{.Message}}"
+
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusTooManyRequests)
+		json.NewEncoder(w).Encode(map[string]interface{}{
+			"error": map[string]interface{}{
+				"message": "Rate limit",
+				"type":    "rate_limit",
+			},
+		})
+	}))
+	defer ts.Close()
+
+	llm := NewLLMClient("test-key", ts.URL+"/v1/chat/completions", "meta-llama/test")
+	_, err := llm.ExtractKeywords(context.Background(), "test")
+	if err == nil || !contains(err.Error(), "Rate limit") {
+		te(t, "expected rate limit error, got: %v", err)
+	}
+}
+
+// --- helpers ---
+func contains(haystack, needle string) bool               { return strings.Contains(haystack, needle) }
+func te(t *testing.T, format string, args ...interface{}) { t.Helper(); t.Fatalf(format, args...) }
--- a/visits.bleve/store/000000000006.zap
+++ b/visits.bleve/store/000000000006.zap
--- a/visits.bleve/store/root.bolt
+++ b/visits.bleve/store/root.bolt