Files
kubesphere/pkg/utils/directives/match.go
2025-04-30 15:53:51 +08:00

240 lines
7.7 KiB
Go

/*
* Copyright 2024 the KubeSphere Authors.
* Please refer to the LICENSE file in the root directory of the project.
* https://github.com/kubesphere/kubesphere/blob/master/LICENSE
*/
package directives
import (
"net/http"
"net/url"
"path"
"runtime"
"strings"
)
// Following code copied from github.com/caddyserver/caddy/modules/caddyhttp/matchers.go
type (
MatchPath []string
)
func (m MatchPath) Match(req *http.Request) bool {
// Even though RFC 9110 says that path matching is case-sensitive
// (https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.3),
// we do case-insensitive matching to mitigate security issues
// related to differences between operating systems, applications,
// etc; if case-sensitive matching is needed, the regex matcher
// can be used instead.
reqPath := strings.ToLower(req.URL.Path)
// See #2917; Windows ignores trailing dots and spaces
// when accessing files (sigh), potentially causing a
// security risk (cry) if PHP files end up being served
// as static files, exposing the source code, instead of
// being matched by *.php to be treated as PHP scripts.
if runtime.GOOS == "windows" { // issue #5613
reqPath = strings.TrimRight(reqPath, ". ")
}
repl := req.Context().Value(ReplacerCtxKey).(*Replacer)
for _, matchPattern := range m {
matchPattern = repl.ReplaceAll(matchPattern, "")
// special case: whole path is wildcard; this is unnecessary
// as it matches all requests, which is the same as no matcher
if matchPattern == "*" {
return true
}
// Clean the path, merge doubled slashes, etc.
// This ensures maliciously crafted requests can't bypass
// the path matcher. See #4407. Good security posture
// requires that we should do all we can to reduce any
// funny-looking paths into "normalized" forms such that
// weird variants can't sneak by.
//
// How we clean the path depends on the kind of pattern:
// we either merge slashes or we don't. If the pattern
// has double slashes, we preserve them in the path.
//
// TODO: Despite the fact that the *vast* majority of path
// matchers have only 1 pattern, a possible optimization is
// to remember the cleaned form of the path for future
// iterations; it's just that the way we clean depends on
// the kind of pattern.
mergeSlashes := !strings.Contains(matchPattern, "//")
// if '%' appears in the match pattern, we interpret that to mean
// the intent is to compare that part of the path in raw/escaped
// space; i.e. "%40"=="%40", not "@", and "%2F"=="%2F", not "/"
if strings.Contains(matchPattern, "%") {
reqPathForPattern := CleanPath(req.URL.EscapedPath(), mergeSlashes)
if m.matchPatternWithEscapeSequence(reqPathForPattern, matchPattern) {
return true
}
// doing prefix/suffix/substring matches doesn't make sense
continue
}
reqPathForPattern := CleanPath(reqPath, mergeSlashes)
// for substring, prefix, and suffix matching, only perform those
// special, fast matches if they are the only wildcards in the pattern;
// otherwise we assume a globular match if any * appears in the middle
// special case: first and last characters are wildcard,
// treat it as a fast substring match
if strings.Count(matchPattern, "*") == 2 &&
strings.HasPrefix(matchPattern, "*") &&
strings.HasSuffix(matchPattern, "*") &&
strings.Count(matchPattern, "*") == 2 {
if strings.Contains(reqPathForPattern, matchPattern[1:len(matchPattern)-1]) {
return true
}
continue
}
// only perform prefix/suffix match if it is the only wildcard...
// I think that is more correct most of the time
if strings.Count(matchPattern, "*") == 1 {
// special case: first character is a wildcard,
// treat it as a fast suffix match
if strings.HasPrefix(matchPattern, "*") {
if strings.HasSuffix(reqPathForPattern, matchPattern[1:]) {
return true
}
continue
}
// special case: last character is a wildcard,
// treat it as a fast prefix match
if strings.HasSuffix(matchPattern, "*") {
if strings.HasPrefix(reqPathForPattern, matchPattern[:len(matchPattern)-1]) {
return true
}
continue
}
}
// at last, use globular matching, which also is exact matching
// if there are no glob/wildcard chars; we ignore the error here
// because we can't handle it anyway
matches, _ := path.Match(matchPattern, reqPathForPattern)
if matches {
return true
}
}
return false
}
func (MatchPath) matchPatternWithEscapeSequence(escapedPath, matchPath string) bool {
// We would just compare the pattern against r.URL.Path,
// but the pattern contains %, indicating that we should
// compare at least some part of the path in raw/escaped
// space, not normalized space; so we build the string we
// will compare against by adding the normalized parts
// of the path, then switching to the escaped parts where
// the pattern hints to us wherever % is present.
var sb strings.Builder
// iterate the pattern and escaped path in lock-step;
// increment iPattern every time we consume a char from the pattern,
// increment iPath every time we consume a char from the path;
// iPattern and iPath are our cursors/iterator positions for each string
var iPattern, iPath int
for {
if iPattern >= len(matchPath) || iPath >= len(escapedPath) {
break
}
// get the next character from the request path
pathCh := string(escapedPath[iPath])
var escapedPathCh string
// normalize (decode) escape sequences
if pathCh == "%" && len(escapedPath) >= iPath+3 {
// hold onto this in case we find out the intent is to match in escaped space here;
// we lowercase it even though technically the spec says: "For consistency, URI
// producers and normalizers should use uppercase hexadecimal digits for all percent-
// encodings" (RFC 3986 section 2.1) - we lowercased the matcher pattern earlier in
// provisioning so we do the same here to gain case-insensitivity in equivalence;
// besides, this string is never shown visibly
escapedPathCh = strings.ToLower(escapedPath[iPath : iPath+3])
var err error
pathCh, err = url.PathUnescape(escapedPathCh)
if err != nil {
// should be impossible unless EscapedPath() is giving us an invalid sequence!
return false
}
iPath += 2 // escape sequence is 2 bytes longer than normal char
}
// now get the next character from the pattern
normalize := true
switch matchPath[iPattern] {
case '%':
// escape sequence
// if not a wildcard ("%*"), compare literally; consume next two bytes of pattern
if len(matchPath) >= iPattern+3 && matchPath[iPattern+1] != '*' {
sb.WriteString(escapedPathCh)
iPath++
iPattern += 2
break
}
// escaped wildcard sequence; consume next byte only ('*')
iPattern++
normalize = false
fallthrough
case '*':
// wildcard, so consume until next matching character
remaining := escapedPath[iPath:]
until := len(escapedPath) - iPath // go until end of string...
if iPattern < len(matchPath)-1 { // ...unless the * is not at the end
nextCh := matchPath[iPattern+1]
until = strings.IndexByte(remaining, nextCh)
if until == -1 {
// terminating char of wildcard span not found, so definitely no match
return false
}
}
if until == 0 {
// empty span; nothing to add on this iteration
break
}
next := remaining[:until]
if normalize {
var err error
next, err = url.PathUnescape(next)
if err != nil {
return false // should be impossible anyway
}
}
sb.WriteString(next)
iPath += until
default:
sb.WriteString(pathCh)
iPath++
}
iPattern++
}
// we can now treat rawpath globs (%*) as regular globs (*)
matchPath = strings.ReplaceAll(matchPath, "%*", "*")
// ignore error here because we can't handle it anyway=
matches, _ := path.Match(matchPath, sb.String())
return matches
}