diff --git a/docs/feature-flags.md b/docs/feature-flags.md index ce3c32e35f..2915c53f2b 100644 --- a/docs/feature-flags.md +++ b/docs/feature-flags.md @@ -334,4 +334,24 @@ runtime behavior (such as output formatting) won't appear here. - 'blocked_by' - the subject issue is blocked by the related issue. - 'blocking' - the subject issue blocks the related issue. (string, required) +### `fields_param` + +- **get_file_contents** - Get file or directory contents + - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'. (string[], optional) + - `owner`: Repository owner (username or organization) (string, required) + - `path`: Path to file/directory (string, optional) + - `ref`: Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head` (string, optional) + - `repo`: Repository name (string, required) + - `sha`: Accepts optional commit SHA. If specified, it will be used instead of ref (string, optional) + +- **search_code** - Search code + - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data. (string[], optional) + - `order`: Sort order for results (string, optional) + - `page`: Page number for pagination (min 1) (number, optional) + - `perPage`: Results per page for pagination (min 1, max 100) (number, optional) + - `query`: Search query (GitHub code search REST). Implicit AND between terms; supports `OR`, `NOT`, and `"quoted phrase"` for exact match. Qualifiers: `repo:owner/repo`, `org:`, `user:`, `language:`, `path:dir` (prefix match), `filename:exact.ext`, `extension:`, `in:file`, `in:path`, `size:`, `is:archived`, `is:fork`. Max 256 chars. Examples: `WithContext language:go org:github`; `"package main" repo:o/r`; `func extension:go path:cmd repo:o/r`; `NOT TODO language:go repo:o/r`. (string, required) + - `sort`: Sort field ('indexed' only) (string, optional) + diff --git a/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap b/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap new file mode 100644 index 0000000000..dec933c94d --- /dev/null +++ b/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap @@ -0,0 +1,57 @@ +{ + "annotations": { + "idempotentHint": false, + "readOnlyHint": true, + "title": "Get file or directory contents" + }, + "description": "Get the contents of a file or directory from a GitHub repository", + "inputSchema": { + "properties": { + "fields": { + "description": "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + "items": { + "enum": [ + "type", + "name", + "path", + "size", + "sha", + "url", + "git_url", + "html_url", + "download_url" + ], + "type": "string" + }, + "type": "array" + }, + "owner": { + "description": "Repository owner (username or organization)", + "type": "string" + }, + "path": { + "default": "/", + "description": "Path to file/directory", + "type": "string" + }, + "ref": { + "description": "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", + "type": "string" + }, + "repo": { + "description": "Repository name", + "type": "string" + }, + "sha": { + "description": "Accepts optional commit SHA. If specified, it will be used instead of ref", + "type": "string" + } + }, + "required": [ + "owner", + "repo" + ], + "type": "object" + }, + "name": "get_file_contents" +} \ No newline at end of file diff --git a/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap b/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap new file mode 100644 index 0000000000..00d4686712 --- /dev/null +++ b/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap @@ -0,0 +1,58 @@ +{ + "annotations": { + "idempotentHint": false, + "readOnlyHint": true, + "title": "Search code" + }, + "description": "Fast and precise code search across ALL GitHub repositories using GitHub's native search engine. Best for finding exact symbols, functions, classes, or specific code patterns.", + "inputSchema": { + "properties": { + "fields": { + "description": "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + "items": { + "enum": [ + "name", + "path", + "sha", + "repository", + "text_matches" + ], + "type": "string" + }, + "type": "array" + }, + "order": { + "description": "Sort order for results", + "enum": [ + "asc", + "desc" + ], + "type": "string" + }, + "page": { + "description": "Page number for pagination (min 1)", + "minimum": 1, + "type": "number" + }, + "perPage": { + "description": "Results per page for pagination (min 1, max 100)", + "maximum": 100, + "minimum": 1, + "type": "number" + }, + "query": { + "description": "Search query (GitHub code search REST). Implicit AND between terms; supports `OR`, `NOT`, and `\"quoted phrase\"` for exact match. Qualifiers: `repo:owner/repo`, `org:`, `user:`, `language:`, `path:dir` (prefix match), `filename:exact.ext`, `extension:`, `in:file`, `in:path`, `size:`, `is:archived`, `is:fork`. Max 256 chars. Examples: `WithContext language:go org:github`; `\"package main\" repo:o/r`; `func extension:go path:cmd repo:o/r`; `NOT TODO language:go repo:o/r`.", + "type": "string" + }, + "sort": { + "description": "Sort field ('indexed' only)", + "type": "string" + } + }, + "required": [ + "query" + ], + "type": "object" + }, + "name": "search_code" +} \ No newline at end of file diff --git a/pkg/github/dependencies.go b/pkg/github/dependencies.go index 1141fbce89..bbc125ca68 100644 --- a/pkg/github/dependencies.go +++ b/pkg/github/dependencies.go @@ -193,6 +193,9 @@ func (d BaseDeps) Logger(_ context.Context) *slog.Logger { // Metrics implements ToolDependencies. func (d BaseDeps) Metrics(ctx context.Context) metrics.Metrics { + if d.Obsv == nil { + return metrics.NewNoopMetrics() + } return d.Obsv.Metrics(ctx) } @@ -423,6 +426,9 @@ func (d *RequestDeps) Logger(_ context.Context) *slog.Logger { // Metrics implements ToolDependencies. func (d *RequestDeps) Metrics(ctx context.Context) metrics.Metrics { + if d.obsv == nil { + return metrics.NewNoopMetrics() + } return d.obsv.Metrics(ctx) } diff --git a/pkg/github/feature_flags.go b/pkg/github/feature_flags.go index fdb113585a..3b6334a4ea 100644 --- a/pkg/github/feature_flags.go +++ b/pkg/github/feature_flags.go @@ -23,6 +23,14 @@ const FeatureFlagFileBlame = "file_blame" // unless explicitly opted in. const FeatureFlagIssueDependencies = "issue_dependencies" +// FeatureFlagFieldsParam is the feature flag name for the optional `fields` +// parameter on selected read tools (for example search_code and +// get_file_contents). When enabled, those tools advertise `fields` and filter +// each result to the requested subset, reducing response size. It is gated so +// the feature can be rolled out gradually and disabled as a kill switch without +// a redeploy. +const FeatureFlagFieldsParam = "fields_param" + // AllowedFeatureFlags is the allowlist of feature flags that can be enabled // by users via --features CLI flag or X-MCP-Features HTTP header. // Only flags in this list are accepted; unknown flags are silently ignored. @@ -35,6 +43,7 @@ var AllowedFeatureFlags = []string{ FeatureFlagPullRequestsGranular, FeatureFlagFileBlame, FeatureFlagIssueDependencies, + FeatureFlagFieldsParam, } // InsidersFeatureFlags is the list of feature flags that insiders mode enables. diff --git a/pkg/github/fields_param_gating_test.go b/pkg/github/fields_param_gating_test.go new file mode 100644 index 0000000000..09b176914b --- /dev/null +++ b/pkg/github/fields_param_gating_test.go @@ -0,0 +1,75 @@ +package github + +import ( + "context" + "testing" + + "github.com/github/github-mcp-server/pkg/translations" + "github.com/google/jsonschema-go/jsonschema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Test_FieldsParamVariants_MutuallyExclusive guards the dual-variant +// registration for the fields_param feature flag. The flag-enabled tools +// (search_code, get_file_contents) and their Legacy* counterparts share a tool +// name, so exactly one of each pair must survive inventory filtering for any +// flag state. If both ever leaked, a client could be offered two tools with the +// same name. This asserts that each gated tool is present exactly once, +// advertising the `fields` parameter only when fields_param is enabled. +func Test_FieldsParamVariants_MutuallyExclusive(t *testing.T) { + gatedTools := []string{"search_code", "get_file_contents"} + + for _, tc := range []struct { + name string + flagEnabled bool + expectFields bool + featureChecks func(context.Context, string) (bool, error) + }{ + { + name: "flag off registers the legacy variant without fields", + flagEnabled: false, + expectFields: false, + featureChecks: featureCheckerFor(), // fields_param disabled + }, + { + name: "flag on registers the fields variant with fields", + flagEnabled: true, + expectFields: true, + featureChecks: featureCheckerFor(FeatureFlagFieldsParam), + }, + } { + t.Run(tc.name, func(t *testing.T) { + inv, err := NewInventory(translations.NullTranslationHelper). + WithToolsets([]string{"all"}). + WithFeatureChecker(tc.featureChecks). + Build() + require.NoError(t, err) + + available := inv.AvailableTools(context.Background()) + + counts := make(map[string]int, len(available)) + for _, tool := range available { + counts[tool.Tool.Name]++ + } + + // Each gated tool must be present exactly once (never both variants) + // and advertise `fields` only when the flag is enabled. + for _, name := range gatedTools { + require.Equalf(t, 1, counts[name], "expected exactly one %q for flagEnabled=%v; dual variants must be mutually exclusive", name, tc.flagEnabled) + + tool := requireToolByName(t, available, name) + schema, ok := tool.Tool.InputSchema.(*jsonschema.Schema) + require.Truef(t, ok, "%q InputSchema should be *jsonschema.Schema", name) + + if tc.expectFields { + assert.Containsf(t, schema.Properties, "fields", "%q should advertise fields when flag is on", name) + assert.Equalf(t, FeatureFlagFieldsParam, tool.FeatureFlagEnable, "%q should be the flag-enabled variant", name) + } else { + assert.NotContainsf(t, schema.Properties, "fields", "%q must not advertise fields when flag is off", name) + assert.Containsf(t, tool.FeatureFlagDisable, FeatureFlagFieldsParam, "%q should be the legacy (flag-disabled) variant", name) + } + } + }) + } +} diff --git a/pkg/github/fields_telemetry.go b/pkg/github/fields_telemetry.go new file mode 100644 index 0000000000..324f463bec --- /dev/null +++ b/pkg/github/fields_telemetry.go @@ -0,0 +1,54 @@ +package github + +import ( + "context" + "strconv" +) + +// Metric names for the optional `fields` response-filtering feature. They let a +// dashboard answer two questions on real traffic: how often the model actually +// filters (adoption) and how many bytes that filtering removes (effectiveness). +// +// Cardinality is kept deliberately low: the only tags ever attached are `tool` +// (a small fixed set of tool names) and `filtered` (a boolean). Unbounded values +// such as repository, owner, user, the query, or the requested field list are +// never used as tags. +const ( + metricFieldsToolCall = "mcp.fields.tool_call" + metricFieldsBytesFull = "mcp.fields.bytes_full" + metricFieldsBytesSent = "mcp.fields.bytes_sent" + metricFieldsBytesSaved = "mcp.fields.bytes_saved" +) + +// recordFieldsUsage emits telemetry for a single call to a tool that supports +// the `fields` parameter. It is best-effort: the local server wires a no-op +// metrics sink, while hosted deployments inject a real sink. +// +// Every call increments mcp.fields.tool_call tagged by tool and whether the +// response was filtered, which yields the adoption rate (filtered / total). When +// the response was filtered, it also records the unfiltered (fullBytes) and +// returned (sentBytes) payload sizes plus their difference, which yields the +// realized savings. Byte counters are only emitted for filtered calls so that +// "percent saved" (bytes_saved / bytes_full) is computed over the population +// where filtering actually applied. +func recordFieldsUsage(ctx context.Context, deps ToolDependencies, tool string, filtered bool, fullBytes, sentBytes int) { + m := deps.Metrics(ctx) + if m == nil { + return + } + + m.Increment(metricFieldsToolCall, map[string]string{ + "tool": tool, + "filtered": strconv.FormatBool(filtered), + }) + + if !filtered { + return + } + + toolTag := map[string]string{"tool": tool} + saved := max(fullBytes-sentBytes, 0) + m.Counter(metricFieldsBytesFull, toolTag, int64(fullBytes)) + m.Counter(metricFieldsBytesSent, toolTag, int64(sentBytes)) + m.Counter(metricFieldsBytesSaved, toolTag, int64(saved)) +} diff --git a/pkg/github/fields_telemetry_test.go b/pkg/github/fields_telemetry_test.go new file mode 100644 index 0000000000..0ac3891edd --- /dev/null +++ b/pkg/github/fields_telemetry_test.go @@ -0,0 +1,138 @@ +package github + +import ( + "context" + "log/slog" + "sync" + "testing" + "time" + + "github.com/github/github-mcp-server/pkg/observability" + "github.com/github/github-mcp-server/pkg/observability/metrics" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// recordingMetrics is a metrics.Metrics implementation that captures emitted +// metrics so tests can assert on telemetry. It is safe for concurrent use. +type recordingMetrics struct { + mu sync.Mutex + increments []recordedMetric + counters []recordedMetric +} + +type recordedMetric struct { + key string + tags map[string]string + value int64 +} + +func (m *recordingMetrics) Increment(key string, tags map[string]string) { + m.mu.Lock() + defer m.mu.Unlock() + m.increments = append(m.increments, recordedMetric{key: key, tags: tags, value: 1}) +} + +func (m *recordingMetrics) Counter(key string, tags map[string]string, value int64) { + m.mu.Lock() + defer m.mu.Unlock() + m.counters = append(m.counters, recordedMetric{key: key, tags: tags, value: value}) +} + +func (m *recordingMetrics) Distribution(_ string, _ map[string]string, _ float64) {} +func (m *recordingMetrics) DistributionMs(_ string, _ map[string]string, _ time.Duration) {} +func (m *recordingMetrics) WithTags(_ map[string]string) metrics.Metrics { return m } + +// counter returns the recorded counter for the given key, or false if absent. +func (m *recordingMetrics) counter(key string) (recordedMetric, bool) { + m.mu.Lock() + defer m.mu.Unlock() + for _, c := range m.counters { + if c.key == key { + return c, true + } + } + return recordedMetric{}, false +} + +// increment returns the recorded increment for the given key, or false if absent. +func (m *recordingMetrics) increment(key string) (recordedMetric, bool) { + m.mu.Lock() + defer m.mu.Unlock() + for _, c := range m.increments { + if c.key == key { + return c, true + } + } + return recordedMetric{}, false +} + +// depsWithRecordingMetrics returns BaseDeps wired with a recording metrics sink +// plus the sink for assertions. +func depsWithRecordingMetrics(t *testing.T, base BaseDeps) (BaseDeps, *recordingMetrics) { + t.Helper() + rec := &recordingMetrics{} + exporters, err := observability.NewExporters(slog.New(slog.DiscardHandler), rec) + require.NoError(t, err) + base.Obsv = exporters + return base, rec +} + +func Test_recordFieldsUsage_Filtered(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + recordFieldsUsage(context.Background(), deps, "search_code", true, 100, 30) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "search_code", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + assert.Equal(t, int64(100), full.value) + assert.Equal(t, "search_code", full.tags["tool"]) + assert.NotContains(t, full.tags, "filtered") + + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + assert.Equal(t, int64(30), sent.value) + + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Equal(t, int64(70), saved.value) +} + +func Test_recordFieldsUsage_NotFiltered(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + recordFieldsUsage(context.Background(), deps, "search_code", false, 100, 100) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "false", call.tags["filtered"]) + + // No byte counters are emitted when the response was not filtered. + _, ok = rec.counter(metricFieldsBytesFull) + assert.False(t, ok) + _, ok = rec.counter(metricFieldsBytesSaved) + assert.False(t, ok) +} + +func Test_recordFieldsUsage_ClampsNegativeSavings(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + // sent larger than full should never yield negative savings. + recordFieldsUsage(context.Background(), deps, "get_file_contents", true, 10, 25) + + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Equal(t, int64(0), saved.value) +} + +func Test_recordFieldsUsage_NilExporterDoesNotPanic(t *testing.T) { + // BaseDeps with no Obsv falls back to a noop sink rather than panicking. + assert.NotPanics(t, func() { + recordFieldsUsage(context.Background(), BaseDeps{}, "search_code", true, 100, 30) + }) +} diff --git a/pkg/github/minimal_types.go b/pkg/github/minimal_types.go index 256bdcb911..467fcda2c0 100644 --- a/pkg/github/minimal_types.go +++ b/pkg/github/minimal_types.go @@ -1,6 +1,8 @@ package github import ( + "bytes" + "encoding/json" "fmt" "net/url" "strconv" @@ -12,6 +14,57 @@ import ( "github.com/github/github-mcp-server/pkg/sanitize" ) +// codeSearchItemFieldEnum lists the selectable fields for search_code result +// items, matching the JSON field names of MinimalCodeResult. The repository and +// text_matches fields are the heaviest, so omitting them is the main lever for +// shrinking large result sets. +var codeSearchItemFieldEnum = []any{"name", "path", "sha", "repository", "text_matches"} + +// fileContentFieldEnum lists the selectable fields for get_file_contents +// directory listings, matching the JSON field names of +// github.RepositoryContent that appear for directory entries. Only applied when +// the requested path is a directory; ignored for single files. +var fileContentFieldEnum = []any{"type", "name", "path", "size", "sha", "url", "git_url", "html_url", "download_url"} + +// filterFields marshals v to a JSON object and returns a map containing only the +// requested fields. Fields that are unknown or absent from the JSON (for example +// empty values dropped via omitempty) are skipped. +func filterFields(v any, fields []string) (map[string]any, error) { + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.UseNumber() // preserve integer precision for fields such as IDs + var object map[string]any + if err := decoder.Decode(&object); err != nil { + return nil, err + } + + picked := make(map[string]any, len(fields)) + for _, field := range fields { + if value, ok := object[field]; ok { + picked[field] = value + } + } + return picked, nil +} + +// filterEachField applies filterFields to every item, returning a slice in which +// each element contains only the requested fields. +func filterEachField[T any](items []T, fields []string) ([]map[string]any, error) { + filtered := make([]map[string]any, 0, len(items)) + for _, item := range items { + picked, err := filterFields(item, fields) + if err != nil { + return nil, err + } + filtered = append(filtered, picked) + } + return filtered, nil +} + // MinimalUser is the output type for user and organization search results. type MinimalUser struct { Login string `json:"login"` diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 949a180081..2e0430ac05 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -693,8 +693,73 @@ func FetchRepoIsPrivate(ctx context.Context, client *github.Client, owner, repo return r.GetPrivate(), nil } -// GetFileContents creates a tool to get the contents of a file or directory from a GitHub repository. +// GetFileContents creates a tool to get the contents of a file or directory from +// a GitHub repository. It is the FeatureFlagFieldsParam-enabled variant: it +// advertises the optional `fields` parameter and filters directory listings to +// the requested subset. Both this and LegacyGetFileContents register under the +// tool name "get_file_contents"; exactly one is active for any given request +// thanks to mutually exclusive FeatureFlagEnable / FeatureFlagDisable annotations. func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool { + st := getFileContentsTool(t, true) + st.FeatureFlagEnable = FeatureFlagFieldsParam + return st +} + +// LegacyGetFileContents is the FeatureFlagFieldsParam-disabled variant of +// get_file_contents. It exposes the original schema (no `fields` parameter) and +// never filters directory listings, so it acts as the kill switch when the flag +// is off. It owns the canonical get_file_contents.snap; the flag-enabled variant +// owns get_file_contents_ff_.snap. Delete this function when the flag is +// removed. +func LegacyGetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool { + st := getFileContentsTool(t, false) + st.FeatureFlagDisable = []string{FeatureFlagFieldsParam} + return st +} + +// getFileContentsTool builds the get_file_contents tool. When includeFields is +// true the tool advertises the optional `fields` parameter, filters directory +// listings to the requested subset, and emits fields telemetry. When false it is +// the original tool with no fields parameter and no filtering. +func getFileContentsTool(t translations.TranslationHelperFunc, includeFields bool) inventory.ServerTool { + schema := &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "owner": { + Type: "string", + Description: "Repository owner (username or organization)", + }, + "repo": { + Type: "string", + Description: "Repository name", + }, + "path": { + Type: "string", + Description: "Path to file/directory", + Default: json.RawMessage(`"/"`), + }, + "ref": { + Type: "string", + Description: "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", + }, + "sha": { + Type: "string", + Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", + }, + }, + Required: []string{"owner", "repo"}, + } + if includeFields { + schema.Properties["fields"] = &jsonschema.Schema{ + Type: "array", + Description: "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: fileContentFieldEnum, + }, + } + } + return NewTool( ToolsetMetadataRepos, mcp.Tool{ @@ -704,33 +769,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool Title: t("TOOL_GET_FILE_CONTENTS_USER_TITLE", "Get file or directory contents"), ReadOnlyHint: true, }, - InputSchema: &jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "owner": { - Type: "string", - Description: "Repository owner (username or organization)", - }, - "repo": { - Type: "string", - Description: "Repository name", - }, - "path": { - Type: "string", - Description: "Path to file/directory", - Default: json.RawMessage(`"/"`), - }, - "ref": { - Type: "string", - Description: "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", - }, - "sha": { - Type: "string", - Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", - }, - }, - Required: []string{"owner", "repo"}, - }, + InputSchema: schema, }, []scopes.Scope{scopes.Repo}, func(ctx context.Context, deps ToolDependencies, _ *mcp.CallToolRequest, args map[string]any) (*mcp.CallToolResult, any, error) { @@ -760,6 +799,14 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return utils.NewToolResultError(err.Error()), nil, nil } + var fields []string + if includeFields { + fields, err = OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } + } + client, err := deps.GetClient(ctx) if err != nil { return utils.NewToolResultError("failed to get GitHub client"), nil, nil @@ -883,10 +930,23 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return attachIFC(utils.NewToolResultResource(fmt.Sprintf("successfully downloaded binary file (SHA: %s)%s", fileSHA, successNote), result)), nil, nil } else if dirContent != nil { // file content or file SHA is nil which means it's a directory - r, err := json.Marshal(dirContent) + filtered := false + var payload any = dirContent + if includeFields && len(fields) > 0 { + filteredEntries, err := filterEachField(dirContent, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter directory contents", err), nil, nil + } + payload = filteredEntries + filtered = true + } + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultError("failed to marshal response"), nil, nil } + if includeFields { + recordDirContentsFieldsUsage(ctx, deps, dirContent, filtered, len(r)) + } return attachIFC(utils.NewToolResultText(string(r))), nil, nil } @@ -895,6 +955,20 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool ) } +// recordDirContentsFieldsUsage emits fields telemetry for a get_file_contents +// directory listing. sentBytes is the size of the payload actually returned. +// When the listing was filtered, the unfiltered size is computed from the full +// directory content so the realized savings can be measured. +func recordDirContentsFieldsUsage(ctx context.Context, deps ToolDependencies, full []*github.RepositoryContent, filtered bool, sentBytes int) { + fullBytes := sentBytes + if filtered { + if data, err := json.Marshal(full); err == nil { + fullBytes = len(data) + } + } + recordFieldsUsage(ctx, deps, "get_file_contents", filtered, fullBytes, sentBytes) +} + // ForkRepository creates a tool to fork a repository. func ForkRepository(t translations.TranslationHelperFunc) inventory.ServerTool { return NewTool( diff --git a/pkg/github/repositories_test.go b/pkg/github/repositories_test.go index e5531cc55b..136aa93696 100644 --- a/pkg/github/repositories_test.go +++ b/pkg/github/repositories_test.go @@ -27,7 +27,11 @@ func Test_GetFileContents(t *testing.T) { // Verify tool definition once serverTool := GetFileContents(translations.NullTranslationHelper) tool := serverTool.Tool - require.NoError(t, toolsnaps.Test(tool.Name, tool)) + // GetFileContents is the FeatureFlagFieldsParam-enabled variant; it owns the + // _ff_ snapshot. The canonical get_file_contents.snap is owned by + // LegacyGetFileContents (see Test_LegacyGetFileContents_Definition). + require.NoError(t, toolsnaps.Test(tool.Name+"_ff_"+FeatureFlagFieldsParam, tool)) + require.Equal(t, FeatureFlagFieldsParam, serverTool.FeatureFlagEnable) schema, ok := tool.InputSchema.(*jsonschema.Schema) require.True(t, ok, "InputSchema should be *jsonschema.Schema") @@ -39,6 +43,7 @@ func Test_GetFileContents(t *testing.T) { assert.Contains(t, schema.Properties, "path") assert.Contains(t, schema.Properties, "ref") assert.Contains(t, schema.Properties, "sha") + assert.Contains(t, schema.Properties, "fields") assert.ElementsMatch(t, schema.Required, []string{"owner", "repo"}) // Mock response for raw content @@ -480,6 +485,137 @@ func Test_GetFileContents(t *testing.T) { } } +func Test_GetFileContents_DirectoryFieldFiltering(t *testing.T) { + mockDirContent := []*github.RepositoryContent{ + { + Type: github.Ptr("file"), + Name: github.Ptr("README.md"), + Path: github.Ptr("README.md"), + SHA: github.Ptr("abc123"), + Size: github.Ptr(42), + URL: github.Ptr("https://api.github.com/repos/owner/repo/contents/README.md"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/README.md"), + DownloadURL: github.Ptr("https://raw.githubusercontent.com/owner/repo/main/README.md"), + }, + { + Type: github.Ptr("dir"), + Name: github.Ptr("src"), + Path: github.Ptr("src"), + SHA: github.Ptr("def456"), + HTMLURL: github.Ptr("https://github.com/owner/repo/tree/main/src"), + }, + } + + serverTool := GetFileContents(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetReposByOwnerByRepo: mockResponse(t, http.StatusOK, "{\"name\": \"repo\", \"default_branch\": \"main\"}"), + GetReposGitRefByOwnerByRepoByRef: mockResponse(t, http.StatusOK, "{\"ref\": \"refs/heads/main\", \"object\": {\"sha\": \"\"}}"), + GetReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{}).andThen( + mockResponse(t, http.StatusOK, mockDirContent), + ), + GetRawReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{"branch": "main"}).andThen( + mockResponse(t, http.StatusNotFound, nil), + ), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "owner": "owner", + "repo": "repo", + "path": "src/", + "fields": []any{"name", "type"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // Each directory entry is reduced to the requested fields only; heavier + // fields such as html_url and download_url are dropped. + var returned []map[string]any + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + require.Len(t, returned, len(mockDirContent)) + for _, entry := range returned { + require.Len(t, entry, 2) + assert.Contains(t, entry, "name") + assert.Contains(t, entry, "type") + } + + assert.NotContains(t, textContent.Text, "html_url") + assert.NotContains(t, textContent.Text, "download_url") +} + +func Test_LegacyGetFileContents_Definition(t *testing.T) { + serverTool := LegacyGetFileContents(translations.NullTranslationHelper) + tool := serverTool.Tool + // LegacyGetFileContents is the FeatureFlagFieldsParam-disabled variant and + // owns the canonical get_file_contents.snap (no `fields`). + require.NoError(t, toolsnaps.Test(tool.Name, tool)) + require.Equal(t, []string{FeatureFlagFieldsParam}, serverTool.FeatureFlagDisable) + + assert.Equal(t, "get_file_contents", tool.Name) + schema, ok := tool.InputSchema.(*jsonschema.Schema) + require.True(t, ok, "InputSchema should be *jsonschema.Schema") + assert.NotContains(t, schema.Properties, "fields") +} + +func Test_GetFileContents_DirectoryFieldsTelemetry(t *testing.T) { + mockDirContent := []*github.RepositoryContent{ + { + Type: github.Ptr("file"), + Name: github.Ptr("README.md"), + Path: github.Ptr("README.md"), + SHA: github.Ptr("abc123"), + Size: github.Ptr(42), + URL: github.Ptr("https://api.github.com/repos/owner/repo/contents/README.md"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/README.md"), + DownloadURL: github.Ptr("https://raw.githubusercontent.com/owner/repo/main/README.md"), + }, + } + + serverTool := GetFileContents(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetReposByOwnerByRepo: mockResponse(t, http.StatusOK, "{\"name\": \"repo\", \"default_branch\": \"main\"}"), + GetReposGitRefByOwnerByRepoByRef: mockResponse(t, http.StatusOK, "{\"ref\": \"refs/heads/main\", \"object\": {\"sha\": \"\"}}"), + GetReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{}).andThen( + mockResponse(t, http.StatusOK, mockDirContent), + ), + GetRawReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{"branch": "main"}).andThen( + mockResponse(t, http.StatusNotFound, nil), + ), + })) + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "owner": "owner", + "repo": "repo", + "path": "src/", + "fields": []any{"name", "type"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "get_file_contents", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Greater(t, full.value, sent.value, "filtering should remove bytes") + assert.Equal(t, full.value-sent.value, saved.value) +} + func Test_GetFileContents_IFC_InsidersMode(t *testing.T) { t.Parallel() diff --git a/pkg/github/search.go b/pkg/github/search.go index 23ccbd8387..e84f261873 100644 --- a/pkg/github/search.go +++ b/pkg/github/search.go @@ -191,8 +191,34 @@ func attachSearchRepositoriesIFCLabel(ctx context.Context, deps ToolDependencies setIFCLabel(callResult, ifc.LabelSearchIssues(visibilities)) } -// SearchCode creates a tool to search for code across GitHub repositories. +// SearchCode creates a tool to search for code across GitHub repositories. It is +// the FeatureFlagFieldsParam-enabled variant: it advertises the optional +// `fields` parameter and filters each result to the requested subset. Both this +// and LegacySearchCode register under the tool name "search_code"; exactly one +// is active for any given request thanks to mutually exclusive +// FeatureFlagEnable / FeatureFlagDisable annotations. func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { + st := searchCodeTool(t, true) + st.FeatureFlagEnable = FeatureFlagFieldsParam + return st +} + +// LegacySearchCode is the FeatureFlagFieldsParam-disabled variant of +// search_code. It exposes the original schema (no `fields` parameter) and never +// filters results, so it acts as the kill switch when the flag is off. It owns +// the canonical search_code.snap; the flag-enabled variant owns +// search_code_ff_.snap. Delete this function when the flag is removed. +func LegacySearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { + st := searchCodeTool(t, false) + st.FeatureFlagDisable = []string{FeatureFlagFieldsParam} + return st +} + +// searchCodeTool builds the search_code tool. When includeFields is true the +// tool advertises the optional `fields` parameter, filters each result to the +// requested subset, and emits fields telemetry. When false it is the original +// tool with no fields parameter and no filtering. +func searchCodeTool(t translations.TranslationHelperFunc, includeFields bool) inventory.ServerTool { schema := &jsonschema.Schema{ Type: "object", Properties: map[string]*jsonschema.Schema{ @@ -212,6 +238,16 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { }, Required: []string{"query"}, } + if includeFields { + schema.Properties["fields"] = &jsonschema.Schema{ + Type: "array", + Description: "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: codeSearchItemFieldEnum, + }, + } + } WithPagination(schema) return NewTool( @@ -239,6 +275,13 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { if err != nil { return utils.NewToolResultError(err.Error()), nil, nil } + var fields []string + if includeFields { + fields, err = OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } + } pagination, err := OptionalPaginationParams(args) if err != nil { return utils.NewToolResultError(err.Error()), nil, nil @@ -297,11 +340,30 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Items: minimalItems, } - r, err := json.Marshal(minimalResult) + filtered := false + var payload any = minimalResult + if includeFields && len(fields) > 0 { + filteredItems, err := filterEachField(minimalItems, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter code search results", err), nil, nil + } + payload = map[string]any{ + "total_count": minimalResult.TotalCount, + "incomplete_results": minimalResult.IncompleteResults, + "items": filteredItems, + } + filtered = true + } + + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } + if includeFields { + recordSearchCodeFieldsUsage(ctx, deps, minimalResult, filtered, len(r)) + } + callResult := utils.NewToolResultText(string(r)) // Code search spans repositories; the IFC label is the conservative // join across every matched repository's visibility, read directly @@ -318,6 +380,20 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { ) } +// recordSearchCodeFieldsUsage emits fields telemetry for a search_code call. +// sentBytes is the size of the payload actually returned. When the response was +// filtered, the unfiltered size is computed from the full minimal result so the +// realized savings can be measured. +func recordSearchCodeFieldsUsage(ctx context.Context, deps ToolDependencies, full *MinimalCodeSearchResult, filtered bool, sentBytes int) { + fullBytes := sentBytes + if filtered { + if data, err := json.Marshal(full); err == nil { + fullBytes = len(data) + } + } + recordFieldsUsage(ctx, deps, "search_code", filtered, fullBytes, sentBytes) +} + func userOrOrgHandler(ctx context.Context, accountType string, deps ToolDependencies, args map[string]any) (*mcp.CallToolResult, any, error) { query, err := RequiredParam[string](args, "query") if err != nil { diff --git a/pkg/github/search_test.go b/pkg/github/search_test.go index 5ebf60842a..6217046e96 100644 --- a/pkg/github/search_test.go +++ b/pkg/github/search_test.go @@ -342,7 +342,11 @@ func Test_SearchCode(t *testing.T) { // Verify tool definition once serverTool := SearchCode(translations.NullTranslationHelper) tool := serverTool.Tool - require.NoError(t, toolsnaps.Test(tool.Name, tool)) + // SearchCode is the FeatureFlagFieldsParam-enabled variant; it owns the + // _ff_ snapshot. The canonical search_code.snap is owned by + // LegacySearchCode (see Test_LegacySearchCode_Definition). + require.NoError(t, toolsnaps.Test(tool.Name+"_ff_"+FeatureFlagFieldsParam, tool)) + require.Equal(t, FeatureFlagFieldsParam, serverTool.FeatureFlagEnable) assert.Equal(t, "search_code", tool.Name) assert.NotEmpty(t, tool.Description) @@ -354,6 +358,7 @@ func Test_SearchCode(t *testing.T) { assert.Contains(t, schema.Properties, "order") assert.Contains(t, schema.Properties, "perPage") assert.Contains(t, schema.Properties, "page") + assert.Contains(t, schema.Properties, "fields") assert.ElementsMatch(t, schema.Required, []string{"query"}) // Setup mock search results @@ -509,6 +514,149 @@ func Test_SearchCode(t *testing.T) { } } +func Test_SearchCode_FieldFiltering(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("file1.go"), + Path: github.Ptr("path/to/file1.go"), + SHA: github.Ptr("abc123def456"), + Repository: &github.Repository{ + Name: github.Ptr("repo"), + FullName: github.Ptr("owner/repo"), + }, + TextMatches: []*github.TextMatch{ + {Fragment: github.Ptr("func main() {}")}, + }, + }, + }, + } + + serverTool := SearchCode(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetSearchCode: mockResponse(t, http.StatusOK, mockSearchResult), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + "fields": []any{"name", "path"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // The wrapper metadata is preserved while each item is reduced to the + // requested fields only; the heavier repository and text_matches data is + // dropped. + var returned struct { + TotalCount int `json:"total_count"` + IncompleteResults bool `json:"incomplete_results"` + Items []map[string]any `json:"items"` + } + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + assert.Equal(t, 1, returned.TotalCount) + require.Len(t, returned.Items, 1) + require.Len(t, returned.Items[0], 2) + assert.Contains(t, returned.Items[0], "name") + assert.Contains(t, returned.Items[0], "path") + + assert.NotContains(t, textContent.Text, "repository") + assert.NotContains(t, textContent.Text, "text_matches") +} + +func Test_LegacySearchCode_Definition(t *testing.T) { + serverTool := LegacySearchCode(translations.NullTranslationHelper) + tool := serverTool.Tool + // LegacySearchCode is the FeatureFlagFieldsParam-disabled variant and owns + // the canonical search_code.snap (no `fields`). + require.NoError(t, toolsnaps.Test(tool.Name, tool)) + require.Equal(t, []string{FeatureFlagFieldsParam}, serverTool.FeatureFlagDisable) + + assert.Equal(t, "search_code", tool.Name) + schema, ok := tool.InputSchema.(*jsonschema.Schema) + require.True(t, ok, "InputSchema should be *jsonschema.Schema") + assert.NotContains(t, schema.Properties, "fields") +} + +func Test_SearchCode_FieldsTelemetry(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("file1.go"), + Path: github.Ptr("path/to/file1.go"), + SHA: github.Ptr("abc123def456"), + Repository: &github.Repository{ + Name: github.Ptr("repo"), + FullName: github.Ptr("owner/repo"), + }, + TextMatches: []*github.TextMatch{ + {Fragment: github.Ptr("func main() {}")}, + }, + }, + }, + } + + serverTool := SearchCode(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetSearchCode: mockResponse(t, http.StatusOK, mockSearchResult), + })) + + t.Run("filtered call records savings", func(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + "fields": []any{"name", "path"}, + }) + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "search_code", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Greater(t, full.value, sent.value, "filtering should remove bytes") + assert.Equal(t, full.value-sent.value, saved.value) + }) + + t.Run("unfiltered call records adoption only", func(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + }) + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "false", call.tags["filtered"]) + + _, ok = rec.counter(metricFieldsBytesFull) + assert.False(t, ok, "no byte counters when not filtered") + }) +} + func Test_SearchUsers(t *testing.T) { // Verify tool definition once serverTool := SearchUsers(translations.NullTranslationHelper) diff --git a/pkg/github/tools.go b/pkg/github/tools.go index e361a6cfa4..24419ed24f 100644 --- a/pkg/github/tools.go +++ b/pkg/github/tools.go @@ -182,8 +182,10 @@ func AllTools(t translations.TranslationHelperFunc) []inventory.ServerTool { // Repository tools SearchRepositories(t), GetFileContents(t), + LegacyGetFileContents(t), ListCommits(t), SearchCode(t), + LegacySearchCode(t), SearchCommits(t), GetCommit(t), GetFileBlame(t),