@@ -17,6 +17,8 @@ import semmle.python.dataflow.new.TaintTracking
1717import semmle.python.dataflow.new.internal.DataFlowPublic
1818import semmle.python.dataflow.new.RemoteFlowSources
1919
20+ // The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
21+ // and textnorm modules. The use of argIdx is to constraint the argument being normalized.
2022class UnicodeCompatibilityNormalize extends API:: CallNode {
2123 int argIdx ;
2224
@@ -66,10 +68,12 @@ predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branc
6668 exists ( API:: CallNode lenCall , Cmpop op_gt , Cmpop op_lt , Node n |
6769 lenCall = n .getALocalSource ( ) and
6870 (
71+ // arg <= LIMIT OR arg < LIMIT
6972 ( op_lt = any ( LtE lte ) or op_lt = any ( Lt lt ) ) and
7073 branch = true and
7174 cn .operands ( n .asCfgNode ( ) , op_lt , _)
7275 or
76+ // LIMIT >= arg OR LIMIT > arg
7377 ( op_gt = any ( GtE gte ) or op_gt = any ( Gt gt ) ) and
7478 branch = true and
7579 cn .operands ( _, op_gt , n .asCfgNode ( ) )
@@ -88,12 +92,16 @@ class Configuration extends TaintTracking::Configuration {
8892 override predicate isSource ( DataFlow:: Node source ) { source instanceof RemoteFlowSource }
8993
9094 override predicate isSanitizer ( DataFlow:: Node sanitizer ) {
95+ // underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
9196 sanitizer = DataFlow:: BarrierGuard< underAValue / 3 > :: getABarrierNode ( )
9297 }
9398
9499 override predicate isSink ( DataFlow:: Node sink ) {
100+ // Any call to the Unicode compatibility normalization is a costly operation
95101 sink = any ( UnicodeCompatibilityNormalize ucn ) .getPathArg ( )
96102 or
103+ // The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
104+ // under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
97105 sink = API:: moduleImport ( "werkzeug" ) .getMember ( "secure_filename" ) .getACall ( ) .getArg ( _)
98106 or
99107 sink =
@@ -102,7 +110,6 @@ class Configuration extends TaintTracking::Configuration {
102110 .getMember ( "secure_filename" )
103111 .getACall ( )
104112 .getArg ( _)
105-
106113 }
107114}
108115
0 commit comments