Skip to content

Commit 4fbf4e2

Browse files
authored
Merge pull request #485 from JohT/feature/additional-queries-for-anomaly-detection
Add csv query reports to anomaly detection
2 parents cff7aaa + 62ab8a1 commit 4fbf4e2

File tree

3 files changed

+47
-6
lines changed

3 files changed

+47
-6
lines changed

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,23 @@ anomaly_detection_labels() {
162162

163163
local language
164164
language=$( extractQueryParameter "projection_language" "${@}" )
165-
165+
166166
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Labelling ${language} ${nodeLabel} anomalies..."
167+
168+
# Within the absolute (full) report directory for anomaly detection, create a sub directory for every detailed type (Java_Package, Java_Type,...)
169+
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}"
170+
mkdir -p "${detail_report_directory}"
171+
167172
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeRemoveLabels.cypher" "${@}"
168-
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeAuthority.cypher" "${@}"
169-
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBottleneck.cypher" "${@}"
170-
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeHub.cypher" "${@}"
171-
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBridge.cypher" "${@}"
172-
execute_cypher_summarized "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}"
173+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeAuthority.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopAuthority.csv"
174+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBottleneck.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopBottleneck.csv"
175+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeHub.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopHub.csv"
176+
# The following two label types require Python scripts to run first.
177+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeBridge.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopBridge.csv"
178+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}" > "${detail_report_directory}/AnomalyArchetypeTopOutlier.csv"
179+
# Output the top anomalies and their archetype + rank
180+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionTopAnomalies.cypher" "${@}" > "${detail_report_directory}/TopAnomalies.csv"
181+
173182
}
174183

175184
# Run the anomaly detection pipeline.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// List top anomalies
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.anomalyScore > 0
6+
AND codeUnit.anomalyLabel = 1
7+
ORDER BY codeUnit.anomalyScore DESC
8+
LIMIT 50
9+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
10+
WITH *, artifact.name AS artifactName
11+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
12+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
13+
WITH *, coalesce(artifactName, projectName) AS projectName
14+
RETURN projectName
15+
,codeUnit.name AS shortCodeUnitName
16+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
17+
,codeUnit.anomalyRank AS anomalyRank
18+
,codeUnit.anomalyScore AS anomalyScore
19+
,coalesce(codeUnit.anomalyAuthorityRank, 0) AS authorityRank
20+
,coalesce(codeUnit.anomalyBottleneckRank, 0) AS bottleneckRank
21+
,coalesce(codeUnit.anomalyBridgeRank, 0) AS bridgeRank
22+
,coalesce(codeUnit.anomalyHubRank, 0) AS hubRank
23+
,coalesce(codeUnit.anomalyOutlierRank, 0) AS outlierRank
24+
,codeUnit.anomalyTopFeature1 AS topFeature1
25+
,codeUnit.anomalyTopFeature2 AS topFeature2
26+
,codeUnit.anomalyTopFeature3 AS topFeature3
27+
,codeUnit.anomalyTopFeatureSHAPValue1 AS topFeature1Score
28+
,codeUnit.anomalyTopFeatureSHAPValue2 AS topFeature2Score
29+
,codeUnit.anomalyTopFeatureSHAPValue3 AS topFeature3Score

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ def add_anomaly_detection_results_to_features(
532532
anomaly_detection_results: AnomalyDetectionResults,
533533
anomaly_label_column: str = 'anomalyLabel',
534534
anomaly_score_column: str = 'anomalyScore',
535+
anomaly_rank_column: str = 'anomalyRank'
535536
) -> pd.DataFrame:
536537
"""
537538
Adds anomaly detection results to the feature and returns the updated dataframe.
@@ -549,6 +550,7 @@ def add_anomaly_detection_results_to_features(
549550
# Add anomaly labels and scores to the feature matrix
550551
features[anomaly_label_column] = anomaly_detection_results.anomaly_labels
551552
features[anomaly_score_column] = anomaly_detection_results.anomaly_scores
553+
features[anomaly_rank_column] = features[anomaly_score_column].rank(method='dense', ascending=False).astype(int)
552554
return features
553555

554556

@@ -1250,6 +1252,7 @@ def output_top_shap_explained_global_features_as_markdown_table(
12501252
'nodeElementId': features["nodeElementId"],
12511253
'anomalyLabel': features['anomalyLabel'].astype(int),
12521254
'anomalyScore': features['anomalyScore'],
1255+
'anomalyRank': features['anomalyRank'],
12531256
'anomalyTopFeature1': features['anomalyTopFeature_1'],
12541257
'anomalyTopFeature2': features['anomalyTopFeature_2'],
12551258
'anomalyTopFeature3': features['anomalyTopFeature_3'],

0 commit comments

Comments
 (0)