|
| 1 | + |
| 2 | +// --- stubs --- |
| 3 | + |
| 4 | +struct URL { |
| 5 | + init?(string: String) {} |
| 6 | +} |
| 7 | + |
| 8 | +extension String { |
| 9 | + init(contentsOf: URL) { |
| 10 | + let data = "" |
| 11 | + self.init(data) |
| 12 | + } |
| 13 | +} |
| 14 | + |
| 15 | +struct AnyRegexOutput { |
| 16 | +} |
| 17 | + |
| 18 | +protocol RegexComponent<RegexOutput> { |
| 19 | + associatedtype RegexOutput |
| 20 | +} |
| 21 | + |
| 22 | +struct Regex<Output> : RegexComponent { |
| 23 | + struct Match { |
| 24 | + } |
| 25 | + |
| 26 | + init(_ pattern: String) throws where Output == AnyRegexOutput { } |
| 27 | + |
| 28 | + func firstMatch(in string: String) throws -> Regex<Output>.Match? { return nil} |
| 29 | + func wholeMatch(in string: String) throws -> Regex<Output>.Match? { return nil } |
| 30 | + |
| 31 | + typealias RegexOutput = Output |
| 32 | +} |
| 33 | + |
| 34 | +extension String : RegexComponent { |
| 35 | + typealias Output = Substring |
| 36 | + typealias RegexOutput = String.Output |
| 37 | +} |
| 38 | + |
| 39 | +// --- tests --- |
| 40 | + |
| 41 | +func id(_ s : String) -> String { return s } |
| 42 | + |
| 43 | +struct MyDomain { |
| 44 | + init(_ hostname: String) { |
| 45 | + self.hostname = hostname |
| 46 | + } |
| 47 | + |
| 48 | + var hostname: String |
| 49 | +} |
| 50 | + |
| 51 | +func testHostnames(myUrl: URL) throws { |
| 52 | + let tainted = String(contentsOf: myUrl) // tainted |
| 53 | + |
| 54 | + _ = try Regex(#"^http://example\.com/"#).firstMatch(in: tainted) // GOOD |
| 55 | + _ = try Regex(#"^http://example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain) |
| 56 | + _ = try Regex(#"^http://example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED] |
| 57 | + _ = try Regex(#"^http://test\.example\.com/"#).firstMatch(in: tainted) // GOOD |
| 58 | + _ = try Regex(#"^http://test\.example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain) |
| 59 | + _ = try Regex(#"^http://test\.example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED] |
| 60 | + _ = try Regex(#"^http://test.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 61 | + _ = try Regex(#"^http://test[.]example[.]com/"#).firstMatch(in: tainted) // GOOD (alternative method of escaping) |
| 62 | + |
| 63 | + _ = try Regex(#"^http://test.example.net/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 64 | + _ = try Regex(#"^http://test.(example-a|example-b).com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 65 | + _ = try Regex(#"^http://(.+).example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname x 2) |
| 66 | + _ = try Regex(#"^http://(\.+)\.example.com/"#).firstMatch(in: tainted) // GOOD |
| 67 | + _ = try Regex(#"^http://(?:.+)\.test\.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 68 | + _ = try Regex(#"^http://test.example.com/(?:.*)"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 69 | + _ = try Regex(#"^(.+\.(?:example-a|example-b)\.com)/"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED] |
| 70 | + _ = try Regex(#"^(https?:)?//((service|www).)?example.com(?=$|/)"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 71 | + _ = try Regex(#"^(http|https)://www.example.com/p/f/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 72 | + _ = try Regex(#"^(http://sub.example.com/)"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 73 | + _ = try Regex(#"^https?://api.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 74 | + _ = try Regex(#"^http[s]?://?sub1\.sub2\.example\.com/f/(.+)"#).firstMatch(in: tainted) // GOOD (it has a capture group after the TLD, so should be ignored) |
| 75 | + _ = try Regex(#"^https://[a-z]*.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 76 | + _ = try Regex(#"^(example.dev|example.com)"#).firstMatch(in: tainted) // GOOD (any extended hostname wouldn't be included in the capture group) |
| 77 | + _ = try Regex(#"^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)"#).firstMatch(in: tainted) // BAD (incomplete hostname x3, missing anchor x 1) |
| 78 | + |
| 79 | + _ = try Regex(#"^http://(..|...)\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (wildcards are intentional) |
| 80 | + _ = try Regex(#"^http://.\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (the wildcard is intentional) |
| 81 | + _ = try Regex(#"^(foo.example\.com|whatever)$"#).firstMatch(in: tainted) // DUBIOUS (one disjunction doesn't even look like a hostname) [DETECTED incomplete hostname] |
| 82 | + |
| 83 | + _ = try Regex(#"^test.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 84 | + _ = try Regex(#"test.example.com"#).wholeMatch(in: tainted) // BAD (incomplete hostname, missing anchor) |
| 85 | + |
| 86 | + _ = try Regex(id(id(id(#"test.example.com$"#)))).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 87 | + |
| 88 | + let hostname = #"test.example.com$"# // BAD (incomplete hostname) [NOT DETECTED] |
| 89 | + _ = try Regex("\(hostname)").firstMatch(in: tainted) |
| 90 | + |
| 91 | + var domain = MyDomain("") |
| 92 | + domain.hostname = #"test.example.com$"# // BAD (incomplete hostname) |
| 93 | + _ = try Regex(domain.hostname).firstMatch(in: tainted) |
| 94 | + |
| 95 | + func convert1(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> { |
| 96 | + return try Regex(domain.hostname) |
| 97 | + } |
| 98 | + _ = try convert1(MyDomain(#"test.example.com$"#)).firstMatch(in: tainted) // BAD (incomplete hostname) |
| 99 | + |
| 100 | + let domains = [ MyDomain(#"test.example.com$"#) ] // BAD (incomplete hostname) [NOT DETECTED] |
| 101 | + func convert2(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> { |
| 102 | + return try Regex(domain.hostname) |
| 103 | + } |
| 104 | + _ = try domains.map({ try convert2($0).firstMatch(in: tainted) }) |
| 105 | + |
| 106 | + let primary = "example.com$" |
| 107 | + _ = try Regex("test." + primary).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED] |
| 108 | + _ = try Regex("test." + "example.com$").firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED] |
| 109 | + _ = try Regex(#"^http://localhost:8000|" + "^https?://.+\.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED] |
| 110 | + _ = try Regex(#"^http://localhost:8000|" + "^https?://.+.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED] |
| 111 | + |
| 112 | + let harmless = #"^http://test.example.com"# // GOOD (never used as a regex) |
| 113 | +} |
0 commit comments