docker · teerthsharma · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
@@ -2690,6 +2690,49 @@
               ],
               "additionalProperties": false
             },
+            "prefetch": {
+              "type": "object",
+              "description": "Optional exact-repeat RAG query caching. When enabled, docker-agent caches final results for repeated normalized queries.",
+              "properties": {
+                "enabled": {
+                  "type": "boolean",
+                  "description": "Enable exact-repeat RAG query caching.",
+                  "default": false
+                },
+                "max_entries": {
+                  "type": "integer",
+                  "description": "Maximum number of cached query result sets.",
+                  "minimum": 1,
+                  "default": 32
+                }
+              },
+              "additionalProperties": false
+            },
+            "topology_prior": {
+              "type": "object",
+              "description": "Optional topology-based score prior. When enabled, docker-agent runs normal retrieval first, then applies a small capped score bias to the current query's retrieved results based on query/source topology and recent result sources.",
+              "properties": {
+                "enabled": {
+                  "type": "boolean",
+                  "description": "Enable topology-based score biasing.",
+                  "default": false
+                },
+                "weight": {
+                  "type": "number",
+                  "description": "Maximum topology contribution blended into each result score. Values above 0.2 are clamped in code.",
+                  "minimum": 0,
+                  "maximum": 0.2,
+                  "default": 0.05
+                },
+                "max_source_history": {
+                  "type": "integer",
+                  "description": "Maximum number of recent result source paths kept for topology scoring.",
+                  "minimum": 1,
+                  "default": 32
+                }
+              },
+              "additionalProperties": false
+            },
             "deduplicate": {
               "type": "boolean",
               "description": "Remove duplicate documents across strategies",

@@ -16,6 +16,7 @@ The `rag` toolset lets agents search through your documents to find relevant inf
 - **Multiple strategies** — Semantic embeddings, BM25 keyword search, and LLM-enhanced search
 - **Hybrid search** — Combine strategies with result fusion for best results
 - **Reranking** — Re-score results with specialized models for improved relevance
+- **Query caching** — Cache exact repeated queries after result post-processing
 
 ## Quick Start
 
@@ -156,6 +157,19 @@ results:
 
 Supported reranking providers: **DMR** (native `/rerank` endpoint), **OpenAI**, **Anthropic**, **Gemini**.
 
+## Query Caching
+
+Query caching is opt-in. It caches final RAG results for exact repeated queries after whitespace and case normalization. Related but different queries always run normal retrieval so results are scored for the user's current query.
+
+```yaml
+results:
+  prefetch:
+    enabled: true
+    max_entries: 32
+```
+
+The cache is bounded per RAG manager and stores cloned result slices so callers cannot mutate cached entries. It is cleared whenever the manager receives an indexing-complete event from initialization or live file-watcher reindexing, which prevents serving results from a previous index version.
+
 ## Code-Aware Chunking
 
 For source code, enable AST-based chunking to keep functions and methods intact:
@@ -263,6 +277,8 @@ Look for log tags: `[RAG Manager]`, `[Chunked-Embeddings Strategy]`, `[BM25 Stra
 | `include_score`       | bool   | `false` | Include relevance scores in results                         |
 | `return_full_content` | bool   | `false` | Return full document content instead of just matched chunks |
 | `reranking.model`     | string | —       | Reranking model reference                                   |
-| `reranking.top_k`     | int    | (`limit`) | Only rerank top K results. Defaults to the results `limit` when set.  |
+| `reranking.top_k`     | int    | (`limit`) | Only rerank top K results. Defaults to the results `limit` when set. |
 | `reranking.threshold` | float  | `0.5`   | Minimum relevance score after reranking                     |
 | `reranking.criteria`  | string | —       | Custom relevance guidance for the reranking model           |
+| `prefetch.enabled`    | bool   | `false` | Enable exact-repeat query caching                           |
+| `prefetch.max_entries` | int   | `32`    | Maximum cached query result sets                            |
@@ -0,0 +1,54 @@
+# This example demonstrates exact-repeat RAG query caching and a small topology prior.
+
+agents:
+  root:
+    model: openai/gpt-5-mini
+    description: assistant with RAG query caching and topology ranking
+    instruction: |
+      You are a helpful assistant with access to hybrid retrieval.
+      Use the knowledge base before answering questions about blorks.
+    toolsets:
+      - type: rag
+        ref: cached_knowledge
+
+rag:
+  cached_knowledge:
+    tool:
+      description: to be used to search for information about blorks
+    docs:
+      - ./blork_field_guide.txt
+    strategies:
+      - type: chunked-embeddings
+        embedding_model: openai/text-embedding-3-small
+        database: ./query_cache_embeddings.db
+        vector_dimensions: 1536
+        similarity_metric: cosine_similarity
+        threshold: 0.5
+        limit: 20
+        chunking:
+          size: 1000
+          overlap: 100
+          respect_word_boundaries: true
+      - type: bm25
+        database: ./query_cache_bm25.db
+        k1: 1.5
+        b: 0.75
+        threshold: 0.3
+        limit: 15
+        chunking:
+          size: 1000
+          overlap: 100
+          respect_word_boundaries: true
+    results:
+      fusion:
+        strategy: rrf
+        k: 60
+      deduplicate: true
+      limit: 5
+      prefetch:
+        enabled: true
+        max_entries: 32
+      topology_prior:
+        enabled: true
+        weight: 0.05
+        max_source_history: 32
@@ -1810,12 +1810,27 @@ func (c *RAGChunkingConfig) UnmarshalYAML(unmarshal func(any) error) error {
 
 // RAGResultsConfig represents result post-processing configuration (common across strategies)
 type RAGResultsConfig struct {
-	Limit             int                 `json:"limit,omitempty"`               // Maximum number of results to return (top K)
-	Fusion            *RAGFusionConfig    `json:"fusion,omitempty"`              // How to combine results from multiple strategies
-	Reranking         *RAGRerankingConfig `json:"reranking,omitempty"`           // Optional reranking configuration
-	Deduplicate       bool                `json:"deduplicate,omitempty"`         // Remove duplicate documents across strategies
-	IncludeScore      bool                `json:"include_score,omitempty"`       // Include relevance scores in results
-	ReturnFullContent bool                `json:"return_full_content,omitempty"` // Return full document content instead of just matched chunks
+	Limit             int                     `json:"limit,omitempty"`               // Maximum number of results to return (top K)
+	Fusion            *RAGFusionConfig        `json:"fusion,omitempty"`              // How to combine results from multiple strategies
+	Reranking         *RAGRerankingConfig     `json:"reranking,omitempty"`           // Optional reranking configuration
+	Prefetch          *RAGPrefetchConfig      `json:"prefetch,omitempty"`            // Optional exact-repeat query cache
+	TopologyPrior     *RAGTopologyPriorConfig `json:"topology_prior,omitempty"`      // Optional topology-based score prior
+	Deduplicate       bool                    `json:"deduplicate,omitempty"`         // Remove duplicate documents across strategies
+	IncludeScore      bool                    `json:"include_score,omitempty"`       // Include relevance scores in results
+	ReturnFullContent bool                    `json:"return_full_content,omitempty"` // Return full document content instead of just matched chunks
+}
+
+// RAGPrefetchConfig configures the exact-repeat RAG query cache.
+type RAGPrefetchConfig struct {
+	Enabled    bool `json:"enabled,omitempty"`
+	MaxEntries int  `json:"max_entries,omitempty"`
+}
+
+// RAGTopologyPriorConfig configures topology-based score biasing.
+type RAGTopologyPriorConfig struct {
+	Enabled          bool    `json:"enabled,omitempty"`
+	Weight           float64 `json:"weight,omitempty"`
+	MaxSourceHistory int     `json:"max_source_history,omitempty"`
 }
 
 // RAGRerankingConfig represents reranking configuration
@@ -1868,12 +1883,14 @@ func defaultRAGResultsConfig() RAGResultsConfig {
 // UnmarshalYAML implements custom unmarshaling so we can apply sensible defaults
 func (r *RAGResultsConfig) UnmarshalYAML(unmarshal func(any) error) error {
 	var raw struct {
-		Limit             int                 `json:"limit,omitempty"`
-		Fusion            *RAGFusionConfig    `json:"fusion,omitempty"`
-		Reranking         *RAGRerankingConfig `json:"reranking,omitempty"`
-		Deduplicate       *bool               `json:"deduplicate,omitempty"`
-		IncludeScore      *bool               `json:"include_score,omitempty"`
-		ReturnFullContent *bool               `json:"return_full_content,omitempty"`
+		Limit             int                     `json:"limit,omitempty"`
+		Fusion            *RAGFusionConfig        `json:"fusion,omitempty"`
+		Reranking         *RAGRerankingConfig     `json:"reranking,omitempty"`
+		Prefetch          *RAGPrefetchConfig      `json:"prefetch,omitempty"`
+		TopologyPrior     *RAGTopologyPriorConfig `json:"topology_prior,omitempty"`
+		Deduplicate       *bool                   `json:"deduplicate,omitempty"`
+		IncludeScore      *bool                   `json:"include_score,omitempty"`
+		ReturnFullContent *bool                   `json:"return_full_content,omitempty"`
 	}
 
 	if err := unmarshal(&raw); err != nil {
@@ -1889,6 +1906,8 @@ func (r *RAGResultsConfig) UnmarshalYAML(unmarshal func(any) error) error {
 	}
 	r.Fusion = raw.Fusion
 	r.Reranking = raw.Reranking
+	r.Prefetch = raw.Prefetch
+	r.TopologyPrior = raw.TopologyPrior
 
 	if raw.Deduplicate != nil {
 		r.Deduplicate = *raw.Deduplicate

@@ -126,6 +126,8 @@ func TestSchemaMatchesGoTypes(t *testing.T) {
 		{reflect.TypeFor[latest.RAGResultsConfig](), []string{"RAGConfig", "results"}, "RAGResultsConfig (RAGConfig.results)"},
 		{reflect.TypeFor[latest.RAGFusionConfig](), []string{"RAGConfig", "results", "fusion"}, "RAGFusionConfig (RAGConfig.results.fusion)"},
 		{reflect.TypeFor[latest.RAGRerankingConfig](), []string{"RAGConfig", "results", "reranking"}, "RAGRerankingConfig (RAGConfig.results.reranking)"},
+		{reflect.TypeFor[latest.RAGPrefetchConfig](), []string{"RAGConfig", "results", "prefetch"}, "RAGPrefetchConfig (RAGConfig.results.prefetch)"},
+		{reflect.TypeFor[latest.RAGTopologyPriorConfig](), []string{"RAGConfig", "results", "topology_prior"}, "RAGTopologyPriorConfig (RAGConfig.results.topology_prior)"},
 		{reflect.TypeFor[latest.RAGChunkingConfig](), []string{"RAGConfig", "strategies", "*", "chunking"}, "RAGChunkingConfig (RAGConfig.strategies[].chunking)"},
 	}
 

@@ -11,6 +11,7 @@ import (
 	"github.com/docker/docker-agent/pkg/environment"
 	"github.com/docker/docker-agent/pkg/model/provider"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
+	"github.com/docker/docker-agent/pkg/rag/prefetch"
 	"github.com/docker/docker-agent/pkg/rag/rerank"
 	"github.com/docker/docker-agent/pkg/rag/strategy"
 	"github.com/docker/docker-agent/pkg/rag/types"
@@ -131,13 +132,36 @@ func buildManagerConfig(
 			Description: ragCfg.Tool.Description,
 			Instruction: ragCfg.Tool.Instruction,
 		},
-		Docs:            GetAbsolutePaths(buildCfg.ParentDir, ragCfg.Docs),
-		Results:         results,
-		FusionConfig:    fusionCfg,
-		StrategyConfigs: strategyConfigs,
+		Docs:                GetAbsolutePaths(buildCfg.ParentDir, ragCfg.Docs),
+		Results:             results,
+		FusionConfig:        fusionCfg,
+		StrategyConfigs:     strategyConfigs,
+		PrefetchConfig:      buildPrefetchConfig(ragCfg.Results.Prefetch),
+		TopologyPriorConfig: buildTopologyPriorConfig(ragCfg.Results.TopologyPrior),
 	}, nil
 }
 
+func buildPrefetchConfig(cfg *latest.RAGPrefetchConfig) prefetch.Config {
+	if cfg == nil {
+		return prefetch.Config{}
+	}
+	return prefetch.Config{
+		Enabled:    cfg.Enabled,
+		MaxEntries: cfg.MaxEntries,
+	}
+}
+
+func buildTopologyPriorConfig(cfg *latest.RAGTopologyPriorConfig) TopologyPriorConfig {
+	if cfg == nil {
+		return TopologyPriorConfig{}
+	}
+	return TopologyPriorConfig{
+		Enabled:          cfg.Enabled,
+		Weight:           cfg.Weight,
+		MaxSourceHistory: cfg.MaxSourceHistory,
+	}
+}
+
 // buildRerankingConfig constructs a RerankingConfig from the configuration.
 func buildRerankingConfig(
 	ctx context.Context,