Skip to content

Commit 52d7e2d

Browse files
committed
Add query for hashing sensitive data with weak hashing algorithm
1 parent 713e19f commit 52d7e2d

File tree

10 files changed

+536
-1
lines changed

10 files changed

+536
-1
lines changed
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for detecting "use of a
3+
* broken or weak cryptographic hashing algorithm on sensitive data"
4+
* vulnerabilities, as well as extension points for adding your own. This is
5+
* divided into two general cases:
6+
* - hashing sensitive data
7+
* - hashing passwords (which requires the hashing algorithm to be
8+
* sufficiently computationally expensive in addition to other requirements)
9+
*/
10+
11+
import go
12+
import semmle.go.dataflow.internal.DataFlowPrivate
13+
private import semmle.go.security.SensitiveActions
14+
15+
/**
16+
* Provides default sources, sinks and sanitizers for detecting "use of a broken or weak
17+
* cryptographic hashing algorithm on sensitive data" vulnerabilities on sensitive data that does
18+
* NOT require computationally expensive hashing, as well as extension points for adding your own.
19+
*
20+
* Also see the `ComputationallyExpensiveHashFunction` module.
21+
*/
22+
module NormalHashFunction {
23+
/**
24+
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on sensitive
25+
* data" vulnerabilities that does not require computationally expensive hashing. That is, a
26+
* piece of sensitive data that is not a password.
27+
*/
28+
abstract class Source extends DataFlow::Node {
29+
Source() { not this instanceof ComputationallyExpensiveHashFunction::Source }
30+
31+
/**
32+
* Gets the classification of the sensitive data.
33+
*/
34+
abstract string getClassification();
35+
}
36+
37+
/**
38+
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on sensitive
39+
* data" vulnerabilities that applies to data that does not require computationally expensive
40+
* hashing. That is, a broken or weak hashing algorithm.
41+
*/
42+
abstract class Sink extends DataFlow::Node {
43+
/**
44+
* Gets the name of the weak hashing algorithm.
45+
*/
46+
abstract string getAlgorithmName();
47+
}
48+
49+
/**
50+
* A barrier for "use of a broken or weak cryptographic hashing algorithm on sensitive data"
51+
* vulnerabilities that applies to data that does not require computationally expensive hashing.
52+
*/
53+
abstract class Barrier extends DataFlow::Node { }
54+
55+
/**
56+
* A flow source modeled by the `SensitiveData` library.
57+
*/
58+
class SensitiveDataAsSource extends Source {
59+
SensitiveExpr::Classification classification;
60+
61+
SensitiveDataAsSource() {
62+
classification = this.asExpr().(SensitiveExpr).getClassification() and
63+
not classification = SensitiveExpr::password() and // (covered in ComputationallyExpensiveHashFunction)
64+
not classification = SensitiveExpr::id() // (not accurate enough)
65+
}
66+
67+
override SensitiveExpr::Classification getClassification() { result = classification }
68+
}
69+
70+
/**
71+
* A flow sink modeled by the `Cryptography` module.
72+
*/
73+
class WeakHashingOperationInputAsSink extends Sink {
74+
Cryptography::HashingAlgorithm algorithm;
75+
76+
WeakHashingOperationInputAsSink() {
77+
exists(Cryptography::CryptographicOperation operation |
78+
algorithm.isWeak() and
79+
algorithm = operation.getAlgorithm() and
80+
this = operation.getAnInput()
81+
)
82+
}
83+
84+
override string getAlgorithmName() { result = algorithm.getName() }
85+
}
86+
}
87+
88+
/**
89+
* Provides default sources, sinks and sanitizers for detecting "use of a broken or weak
90+
* cryptographic hashing algorithm on sensitive data" vulnerabilities on sensitive data that DOES
91+
* require computationally expensive hashing, as well as extension points for adding your own.
92+
*
93+
* Also see the `NormalHashFunction` module.
94+
*/
95+
module ComputationallyExpensiveHashFunction {
96+
/**
97+
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on sensitive
98+
* data" vulnerabilities that does require computationally expensive hashing. That is, a
99+
* password.
100+
*/
101+
abstract class Source extends DataFlow::Node {
102+
/**
103+
* Gets the classification of the sensitive data.
104+
*/
105+
abstract string getClassification();
106+
}
107+
108+
/**
109+
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on sensitive
110+
* data" vulnerabilities that applies to data that does require computationally expensive
111+
* hashing. That is, a broken or weak hashing algorithm or one that is not computationally
112+
* expensive enough for password hashing.
113+
*/
114+
abstract class Sink extends DataFlow::Node {
115+
/**
116+
* Gets the name of the weak hashing algorithm.
117+
*/
118+
abstract string getAlgorithmName();
119+
120+
/**
121+
* Holds if this sink is for a computationally expensive hash function (meaning that hash
122+
* function is just weak in some other regard.
123+
*/
124+
abstract predicate isComputationallyExpensive();
125+
}
126+
127+
/**
128+
* A barrier for "use of a broken or weak cryptographic hashing algorithm on sensitive data"
129+
* vulnerabilities that applies to data that does require computationally expensive hashing.
130+
*/
131+
abstract class Barrier extends DataFlow::Node { }
132+
133+
/**
134+
* A flow source modeled by the `SensitiveData` library.
135+
*/
136+
class PasswordAsSource extends Source {
137+
SensitiveExpr::Classification classification;
138+
139+
PasswordAsSource() {
140+
classification = this.asExpr().(SensitiveExpr).getClassification() and
141+
classification = SensitiveExpr::password()
142+
}
143+
144+
override SensitiveExpr::Classification getClassification() { result = classification }
145+
}
146+
147+
/**
148+
* A flow sink modeled by the `Cryptography` module.
149+
*/
150+
class WeakPasswordHashingOperationInputSink extends Sink {
151+
Cryptography::CryptographicAlgorithm algorithm;
152+
153+
WeakPasswordHashingOperationInputSink() {
154+
exists(Cryptography::CryptographicOperation operation |
155+
(
156+
algorithm instanceof Cryptography::PasswordHashingAlgorithm and
157+
algorithm.isWeak()
158+
or
159+
algorithm instanceof Cryptography::HashingAlgorithm // Note that HashingAlgorithm and PasswordHashingAlgorithm are disjoint
160+
) and
161+
algorithm = operation.getAlgorithm() and
162+
this = operation.getAnInput()
163+
)
164+
}
165+
166+
override string getAlgorithmName() { result = algorithm.getName() }
167+
168+
override predicate isComputationallyExpensive() {
169+
algorithm instanceof Cryptography::PasswordHashingAlgorithm
170+
}
171+
}
172+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
Using a broken or weak cryptographic hash function can leave data
8+
vulnerable, and should not be used in security related code.
9+
</p>
10+
11+
<p>
12+
A strong cryptographic hash function should be resistant to:
13+
</p>
14+
<ul>
15+
<li>
16+
pre-image attacks: if you know a hash value <code>h(x)</code>,
17+
you should not be able to easily find the input <code>x</code>.
18+
</li>
19+
<li>
20+
collision attacks: if you know a hash value <code>h(x)</code>,
21+
you should not be able to easily find a different input <code>y</code>
22+
with the same hash value <code>h(x) = h(y)</code>.
23+
</li>
24+
</ul>
25+
<p>
26+
In cases with a limited input space, such as for passwords, the hash
27+
function also needs to be computationally expensive to be resistant to
28+
brute-force attacks. Passwords should also have an unique salt applied
29+
before hashing, but that is not considered by this query.
30+
</p>
31+
32+
<p>
33+
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
34+
</p>
35+
36+
<p>
37+
Since it's OK to use a weak cryptographic hash function in a non-security
38+
context, this query only alerts when these are used to hash sensitive
39+
data (such as passwords, certificates, usernames).
40+
</p>
41+
42+
<p>
43+
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
44+
handled by the <code>rb/weak-cryptographic-algorithm</code> query.
45+
</p>
46+
47+
</overview>
48+
<recommendation>
49+
50+
<p>
51+
Ensure that you use a strong, modern cryptographic hash function:
52+
</p>
53+
54+
<ul>
55+
<li>
56+
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
57+
</li>
58+
<li>
59+
such as SHA-2, or SHA-3 in other cases.
60+
</li>
61+
</ul>
62+
63+
</recommendation>
64+
<example>
65+
66+
<p>
67+
The following example shows two functions for checking whether the hash
68+
of a certificate matches a known value -- to prevent tampering.
69+
70+
The first function uses MD5 that is known to be vulnerable to collision attacks.
71+
72+
The second function uses SHA-256 that is a strong cryptographic hashing function.
73+
</p>
74+
75+
<sample src="examples/weak_certificate_hashing.rb" />
76+
77+
</example>
78+
<example>
79+
<p>
80+
The following example shows two functions for hashing passwords.
81+
82+
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
83+
strong cryptographic hash function, it is not suitable for password
84+
hashing since it is not computationally expensive.
85+
</p>
86+
87+
<sample src="examples/weak_password_hashing_bad.rb" />
88+
89+
90+
<p>
91+
The second function uses Argon2 (through the <code>argon2</code>
92+
gem), which is a strong password hashing algorithm (and
93+
includes a per-password salt by default).
94+
</p>
95+
96+
<sample src="examples/weak_password_hashing_good.rb" />
97+
98+
</example>
99+
100+
<references>
101+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
102+
</references>
103+
104+
</qhelp>
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/**
2+
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
3+
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
4+
* @kind path-problem
5+
* @problem.severity warning
6+
* @security-severity 7.5
7+
* @precision high
8+
* @id go/weak-sensitive-data-hashing
9+
* @tags security
10+
* external/cwe/cwe-327
11+
* external/cwe/cwe-328
12+
* external/cwe/cwe-916
13+
*/
14+
15+
import go
16+
import semmle.go.security.WeakSensitiveDataHashingCustomizations
17+
18+
/**
19+
* Provides a taint-tracking configuration for detecting use of a broken or weak
20+
* cryptographic hash function on sensitive data, that does NOT require a
21+
* computationally expensive hash function.
22+
*/
23+
module NormalHashFunctionFlow {
24+
import NormalHashFunction
25+
26+
private module Config implements DataFlow::ConfigSig {
27+
predicate isSource(DataFlow::Node source) { source instanceof Source }
28+
29+
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
30+
31+
predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
32+
33+
predicate isBarrierIn(DataFlow::Node node) {
34+
// make sources barriers so that we only report the closest instance
35+
isSource(node)
36+
}
37+
38+
predicate isBarrierOut(DataFlow::Node node) {
39+
// make sinks barriers so that we only report the closest instance
40+
isSink(node)
41+
}
42+
}
43+
44+
import TaintTracking::Global<Config>
45+
}
46+
47+
/**
48+
* Provides a taint-tracking configuration for detecting use of a broken or weak
49+
* cryptographic hashing algorithm on passwords.
50+
*
51+
* Passwords has stricter requirements on the hashing algorithm used (must be
52+
* computationally expensive to prevent brute-force attacks).
53+
*/
54+
module ComputationallyExpensiveHashFunctionFlow {
55+
import ComputationallyExpensiveHashFunction
56+
57+
private module Config implements DataFlow::ConfigSig {
58+
predicate isSource(DataFlow::Node source) { source instanceof Source }
59+
60+
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
61+
62+
predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
63+
64+
predicate isBarrierIn(DataFlow::Node node) {
65+
// make sources barriers so that we only report the closest instance
66+
isSource(node)
67+
}
68+
69+
predicate isBarrierOut(DataFlow::Node node) {
70+
// make sinks barriers so that we only report the closest instance
71+
isSink(node)
72+
}
73+
}
74+
75+
import TaintTracking::Global<Config>
76+
}
77+
78+
/**
79+
* Global taint-tracking for detecting both variants of "use of a broken or weak
80+
* cryptographic hashing algorithm on sensitive data" vulnerabilities. The two configurations are
81+
* merged to generate a combined path graph.
82+
*/
83+
module WeakSensitiveDataHashingFlow =
84+
DataFlow::MergePathGraph<NormalHashFunctionFlow::PathNode,
85+
ComputationallyExpensiveHashFunctionFlow::PathNode, NormalHashFunctionFlow::PathGraph,
86+
ComputationallyExpensiveHashFunctionFlow::PathGraph>;
87+
88+
import WeakSensitiveDataHashingFlow::PathGraph
89+
90+
from
91+
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink,
92+
string ending, string algorithmName, string classification
93+
where
94+
NormalHashFunctionFlow::flowPath(source.asPathNode1(), sink.asPathNode1()) and
95+
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
96+
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
97+
ending = "."
98+
or
99+
ComputationallyExpensiveHashFunctionFlow::flowPath(source.asPathNode2(), sink.asPathNode2()) and
100+
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
101+
classification =
102+
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
103+
(
104+
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
105+
ending = "."
106+
or
107+
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
108+
ending =
109+
" for " + classification +
110+
" hashing, since it is not a computationally expensive hash function."
111+
)
112+
select sink.getNode(), source, sink,
113+
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
114+
source.getNode(), "Sensitive data (" + classification + ")"

0 commit comments

Comments
 (0)