Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions pkg/github/issues_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,45 @@ func Test_GetIssue(t *testing.T) {
}
}

func Test_GetIssue_PreservesAngleBracketsInCodeBlocks(t *testing.T) {
body := "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```"
mockIssue := &github.Issue{
Number: github.Ptr(42),
Title: github.Ptr("Angle brackets in code"),
Body: github.Ptr(body),
State: github.Ptr("open"),
HTMLURL: github.Ptr("https://github.com/owner/repo/issues/42"),
User: &github.User{Login: github.Ptr("testuser")},
}

serverTool := IssueRead(translations.NullTranslationHelper)
client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
GetReposIssuesByOwnerByRepoByIssueNumber: mockResponse(t, http.StatusOK, mockIssue),
}))
deps := BaseDeps{
Client: client,
GQLClient: defaultGQLClient,
}
handler := serverTool.Handler(deps)

request := createMCPRequest(map[string]any{
"method": "get",
"owner": "owner",
"repo": "repo",
"issue_number": float64(42),
})
result, err := handler(ContextWithDeps(context.Background(), deps), &request)
require.NoError(t, err)
require.NotNil(t, result)
require.False(t, result.IsError)

textContent := getTextResult(t, result)
var returnedIssue MinimalIssue
err = json.Unmarshal([]byte(textContent.Text), &returnedIssue)
require.NoError(t, err)
assert.Equal(t, body, returnedIssue.Body)
}

func Test_IssueRead_IFC_InsidersMode(t *testing.T) {
t.Parallel()

Expand Down
143 changes: 141 additions & 2 deletions pkg/sanitize/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ var policy *bluemonday.Policy
var policyOnce sync.Once

func Sanitize(input string) string {
return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input)))
cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input))
protected := protectCodeAngleBrackets(cleaned)
sanitized := FilterHTMLTags(protected)
return restoreCodeAngleBrackets(sanitized)
}

// FilterInvisibleCharacters removes invisible or control characters that should not appear
Expand Down Expand Up @@ -145,6 +148,141 @@ func isSafeCodeFenceToken(token string) bool {
return true
}

// Sentinels used to protect angle brackets inside code from HTML sanitization.
// NUL bytes are stripped by FilterInvisibleCharacters before protectCodeAngleBrackets
// runs, preventing sentinel collision attacks.
const (
ltSentinel = "\x00LT\x00"
gtSentinel = "\x00GT\x00"
)

// protectCodeAngleBrackets replaces < and > inside fenced and inline code with
// sentinels so bluemonday does not strip them as HTML tags.
func protectCodeAngleBrackets(input string) string {
if input == "" {
return input
}

lines := strings.Split(input, "\n")
insideFence := false
currentFenceLen := 0

for i, line := range lines {
if toggled, fenceLen := toggleCodeFence(line, insideFence, currentFenceLen); toggled {
insideFence = !insideFence
if insideFence {
currentFenceLen = fenceLen
} else {
currentFenceLen = 0
}
continue
}

if insideFence {
lines[i] = replaceAngleBrackets(line)
continue
}
lines[i] = protectInlineCodeAngleBrackets(line)
}

return strings.Join(lines, "\n")
}

func toggleCodeFence(line string, insideFence bool, currentFenceLen int) (bool, int) {
idx := strings.Index(line, "```")
if idx == -1 || hasNonWhitespace(line[:idx]) {
return false, currentFenceLen
}

fenceEnd := idx
for fenceEnd < len(line) && line[fenceEnd] == '`' {
fenceEnd++
}

fenceLen := fenceEnd - idx
if fenceLen < 3 {
return false, currentFenceLen
}

if insideFence {
if currentFenceLen != 0 && fenceLen < currentFenceLen {
return false, currentFenceLen
}
return true, fenceLen
}

return true, fenceLen
}

func protectInlineCodeAngleBrackets(line string) string {
if !strings.Contains(line, "`") {
return line
}

var out strings.Builder
out.Grow(len(line))
i := 0
for i < len(line) {
if line[i] != '`' {
out.WriteByte(line[i])
i++
continue
}

openStart := i
openLen := 0
for i < len(line) && line[i] == '`' {
openLen++
i++
}

contentStart := i
closeIdx := findInlineCodeClose(line, contentStart, openLen)
if closeIdx == -1 {
out.WriteString(line[openStart:i])
continue
}

out.WriteString(line[openStart:contentStart])
out.WriteString(replaceAngleBrackets(line[contentStart:closeIdx]))
out.WriteString(line[closeIdx : closeIdx+openLen])
i = closeIdx + openLen
}

return out.String()
}

func findInlineCodeClose(line string, contentStart, openLen int) int {
for i := contentStart; i < len(line); i++ {
if line[i] != '`' {
continue
}

closeLen := 0
for j := i; j < len(line) && line[j] == '`'; j++ {
closeLen++
}
if closeLen == openLen {
return i
}
}

return -1
}

func replaceAngleBrackets(s string) string {
if !strings.ContainsAny(s, "<>") {
return s
}
s = strings.ReplaceAll(s, "<", ltSentinel)
return strings.ReplaceAll(s, ">", gtSentinel)
}

func restoreCodeAngleBrackets(input string) string {
s := strings.ReplaceAll(input, ltSentinel, "<")
return strings.ReplaceAll(s, gtSentinel, ">")
}

func getPolicy() *bluemonday.Policy {
policyOnce.Do(func() {
p := bluemonday.StrictPolicy()
Expand Down Expand Up @@ -175,7 +313,8 @@ func getPolicy() *bluemonday.Policy {

func shouldRemoveRune(r rune) bool {
switch r {
case 0x200B, // ZERO WIDTH SPACE
case 0x0000, // NUL — stripped to prevent sentinel collision in protectCodeAngleBrackets
0x200B, // ZERO WIDTH SPACE
0x200C, // ZERO WIDTH NON-JOINER
0x200E, // LEFT-TO-RIGHT MARK
0x200F, // RIGHT-TO-LEFT MARK
Expand Down
83 changes: 83 additions & 0 deletions pkg/sanitize/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func TestShouldRemoveRune(t *testing.T) {
expected bool
}{
// Individual characters that should be removed
{name: "NUL byte", rune: 0x0000, expected: true},
{name: "zero width space", rune: 0x200B, expected: true},
{name: "zero width non-joiner", rune: 0x200C, expected: true},
{name: "left-to-right mark", rune: 0x200E, expected: true},
Expand Down Expand Up @@ -300,3 +301,85 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) {
result := Sanitize(input)
assert.Equal(t, expected, result)
}

func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "fenced code block with angle brackets",
input: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
expected: "```\nlet ptr: mut_raw_ptr<int> = raw_new int;\n```",
},
{
name: "inline code with angle brackets",
input: "Use `Vec<String>` for collections.",
expected: "Use `Vec<String>` for collections.",
},
{
name: "angle brackets outside code are sanitized",
input: "This has <script>alert('xss')</script> in it.",
expected: "This has in it.",
},
{
name: "fenced code block with generic types",
input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.",
},
{
name: "multiple inline code spans with angle brackets",
input: "Compare `Map<K, V>` and `Set<T>`.",
expected: "Compare `Map<K, V>` and `Set<T>`.",
},
{
name: "shorter fence inside code does not close block",
input: "````\nline<A>\n```\nstill<B>\n````",
expected: "````\nline<A>\n```\nstill<B>\n````",
},
{
name: "sentinel collision does not bypass sanitizer",
input: "\x00LT\x00script\x00GT\x00alert(1)\x00LT\x00/script\x00GT\x00",
expected: "LTscriptGTalert(1)LT/scriptGT",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := Sanitize(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}

func TestProtectCodeAngleBrackets(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "fenced code block with angle brackets",
input: "```\nvector<int> v;\n```",
expected: "```\nvector" + ltSentinel + "int" + gtSentinel + " v;\n```",
},
{
name: "inline code with angle brackets",
input: "Use `Map<K, V>` here.",
expected: "Use `Map" + ltSentinel + "K, V" + gtSentinel + "` here.",
},
{
name: "angle brackets outside code unchanged",
input: "Use <b>bold</b>\n```\ncode<T>\n```",
expected: "Use <b>bold</b>\n```\ncode" + ltSentinel + "T" + gtSentinel + "\n```",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := protectCodeAngleBrackets(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}