66
77import javascript
88import CodeToFeatures
9- import EndpointScoring
9+ private import EndpointScoring
10+
11+ /**
12+ * A configuration that defines which endpoints should be featurized.
13+ *
14+ * This is used as a performance optimization to ensure that we only featurize the endpoints we need
15+ * to featurize.
16+ */
17+ abstract class FeaturizationConfig extends string {
18+ bindingset [ this ]
19+ FeaturizationConfig ( ) { any ( ) }
20+
21+ abstract DataFlow:: Node getAnEndpointToFeaturize ( ) ;
22+ }
1023
1124/**
1225 * Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
1326 *
1427 * This is a single string containing a space-separated list of tokens.
1528 */
1629private string getTokenFeature ( DataFlow:: Node endpoint , string featureName ) {
17- // Features for endpoints that are contained within a function.
18- exists ( DatabaseFeatures:: Entity entity | entity = getRepresentativeEntityForEndpoint ( endpoint ) |
19- // The name of the function that encloses the endpoint.
20- featureName = "enclosingFunctionName" and result = entity .getName ( )
21- or
22- // A feature containing natural language tokens from the function that encloses the endpoint in
23- // the order that they appear in the source code.
24- featureName = "enclosingFunctionBody" and
25- result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
26- )
27- or
28- result =
29- strictconcat ( DataFlow:: CallNode call , string component |
30- component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
31- |
32- component , " "
30+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
31+ endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
32+ (
33+ // Features for endpoints that are contained within a function.
34+ exists ( DatabaseFeatures:: Entity entity | entity = getRepresentativeEntityForEndpoint ( endpoint ) |
35+ // The name of the function that encloses the endpoint.
36+ featureName = "enclosingFunctionName" and result = entity .getName ( )
37+ or
38+ // A feature containing natural language tokens from the function that encloses the endpoint in
39+ // the order that they appear in the source code.
40+ featureName = "enclosingFunctionBody" and
41+ result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
3342 )
34- or
35- // The access path of the function being called, both with and without structural info, if the
36- // function being called originates from an external API. For example, the endpoint here:
37- //
38- // ```js
39- // const mongoose = require('mongoose'),
40- // User = mongoose.model('User', null);
41- // User.findOne(ENDPOINT);
42- // ```
43- //
44- // would have a callee access path with structural info of
45- // `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
46- // path without structural info of `mongoose model findOne`.
47- //
48- // These features indicate that the callee comes from (reading the access path backwards) an
49- // instance of the `findOne` member of an instance of the `model` member of the `mongoose`
50- // external library.
51- exists ( AccessPaths:: Boolean includeStructuralInfo |
52- featureName =
53- "calleeAccessPath" +
54- any ( string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "" ) and
43+ or
5544 result =
56- concat ( API:: Node node , string accessPath |
57- node .getInducingNode ( ) .( DataFlow:: CallNode ) .getAnArgument ( ) = endpoint and
58- AccessPaths:: accessPaths ( node , includeStructuralInfo , accessPath , _)
45+ strictconcat ( DataFlow:: CallNode call , string component |
46+ component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
5947 |
60- accessPath , " "
48+ component , " "
6149 )
50+ or
51+ // The access path of the function being called, both with and without structural info, if the
52+ // function being called originates from an external API. For example, the endpoint here:
53+ //
54+ // ```js
55+ // const mongoose = require('mongoose'),
56+ // User = mongoose.model('User', null);
57+ // User.findOne(ENDPOINT);
58+ // ```
59+ //
60+ // would have a callee access path with structural info of
61+ // `mongoose member model instanceorreturn member findOne instanceorreturn`, and a callee access
62+ // path without structural info of `mongoose model findOne`.
63+ //
64+ // These features indicate that the callee comes from (reading the access path backwards) an
65+ // instance of the `findOne` member of an instance of the `model` member of the `mongoose`
66+ // external library.
67+ exists ( AccessPaths:: Boolean includeStructuralInfo |
68+ featureName =
69+ "calleeAccessPath" +
70+ any ( string x | if includeStructuralInfo = true then x = "WithStructuralInfo" else x = "" ) and
71+ result =
72+ concat ( API:: Node node , string accessPath |
73+ node .getInducingNode ( ) .( DataFlow:: CallNode ) .getAnArgument ( ) = endpoint and
74+ AccessPaths:: accessPaths ( node , includeStructuralInfo , accessPath , _)
75+ |
76+ accessPath , " "
77+ )
78+ )
6279 )
6380}
6481
@@ -77,6 +94,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
7794private string getACallBasedTokenFeatureComponent (
7895 DataFlow:: Node endpoint , DataFlow:: CallNode call , string featureName
7996) {
97+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
98+ endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
8099 // Features for endpoints that are an argument to a function call.
81100 endpoint = call .getAnArgument ( ) and
82101 (
@@ -111,6 +130,9 @@ private string getACallBasedTokenFeatureComponent(
111130module FunctionBodies {
112131 /** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113132 private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location location , string token ) {
133+ // Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134+ entity =
135+ getRepresentativeEntityForEndpoint ( any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) ) and
114136 exists ( DatabaseFeatures:: AstNode node |
115137 DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
116138 token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
@@ -276,7 +298,8 @@ private string getASupportedFeatureName() {
276298 * `featureValue` for the endpoint `endpoint`.
277299 */
278300predicate tokenFeatures ( DataFlow:: Node endpoint , string featureName , string featureValue ) {
279- ModelScoring:: endpoints ( endpoint ) and
301+ // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
302+ endpoint = any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) and
280303 (
281304 if strictcount ( getTokenFeature ( endpoint , featureName ) ) = 1
282305 then featureValue = getTokenFeature ( endpoint , featureName )
0 commit comments