Skip to content

Commit cd96939

Browse files
committed
Visualize longest paths with GraphViz
1 parent acc6538 commit cd96939

File tree

4 files changed

+197
-1
lines changed

4 files changed

+197
-1
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Path Finding - Longest path - Stream - List all dependencies for nodes contributing to longest paths and highlight those paths in the Visualization with GraphViz.
2+
3+
// Gather global statistics about dependency weights and levels for normalization and node details
4+
MATCH (sourceNodeForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetNodeForStatistics)
5+
WHERE $dependencies_projection_node IN LABELS(sourceNodeForStatistics)
6+
AND $dependencies_projection_node IN LABELS(targetNodeForStatistics)
7+
WITH min(dependencyForStatistics[$dependencies_projection_weight_property]) AS minWeight
8+
,max(dependencyForStatistics[$dependencies_projection_weight_property]) AS maxWeight
9+
,max(targetNodeForStatistics.maxDistanceFromSource) AS maxLevel
10+
WITH *, 1.0 / toFloat(maxWeight - minWeight) AS weightNormalizationFactor
11+
WITH { minWeight: minWeight, maxLevel: maxLevel, weightNormalizationFactor: weightNormalizationFactor } AS statistics
12+
// -> Main call to execute "longest path" algorithm
13+
CALL gds.dag.longestPath.stream($dependencies_projection + '-cleaned')
14+
YIELD index, totalCost, path
15+
WITH *
16+
// Sort longest paths by their length descending and - if equal - by their index ascending
17+
ORDER BY totalCost DESC, index ASC
18+
// Only take the top 50 longest paths as a compromise between performance and visualization content
19+
LIMIT 50
20+
// Collect all results of the longest path search as well as all nodes of the longest paths
21+
WITH statistics
22+
,collect({index: index, distance: toInteger(totalCost), path: path}) AS longestPaths
23+
,collect(nodes(path)) AS allLongestPathNodes
24+
// Flatten and deduplicate the list of all nodes that contribute to at least one longest path
25+
UNWIND allLongestPathNodes AS longestPathNodes
26+
UNWIND longestPathNodes AS longestPathNode
27+
WITH statistics
28+
,longestPaths
29+
,collect(DISTINCT longestPathNode) AS allDistinctLongestPathNodes
30+
// Iterate over all longest paths
31+
UNWIND longestPaths AS longestPath
32+
WITH statistics
33+
,longestPaths, allDistinctLongestPathNodes
34+
,[ singleRelationship IN relationships(longestPath.path) | [startNode(singleRelationship), endNode(singleRelationship)] ] AS allLongestPathStartAndEndNodeTuples
35+
,[ singleRelationship IN relationships(longestPaths[0].path) | [startNode(singleRelationship), endNode(singleRelationship)] ] AS longestPathStartAndEndNodeTuples
36+
,longestPath.index AS index
37+
,longestPath.distance AS distance
38+
// -> Main query of all dependencies of nodes contributing to the longest paths
39+
MATCH (source)-[dependency:DEPENDS_ON]->(target)
40+
WHERE $dependencies_projection_node IN labels(source)
41+
AND $dependencies_projection_node IN labels(target)
42+
// Dependent nodes need to be part of at least one longest paths
43+
AND (source IN allDistinctLongestPathNodes AND target IN allDistinctLongestPathNodes)
44+
WITH statistics.maxLevel AS maxLevel
45+
,statistics.minWeight AS minWeight
46+
,statistics.weightNormalizationFactor AS weightNormalizationFactor
47+
,count(index) AS numberOfLongestPathsPassing
48+
,max(distance) AS lengthOfLongestPathPassing
49+
,dependency
50+
,source
51+
,target
52+
// If there is at least one longest path passing through the dependency then "contributesToALongestPath" is true
53+
,([source, target] IN allLongestPathStartAndEndNodeTuples) AS contributesToALongestPath
54+
,([source, target] IN longestPathStartAndEndNodeTuples) AS isPartOfLongestPath
55+
WITH *, dependency[$dependencies_projection_weight_property] AS weight
56+
WITH *, toFloat(weight - minWeight) * weightNormalizationFactor AS normalizedWeight
57+
WITH *, round((normalizedWeight * 5) + 1, 2) AS penWidth
58+
WITH *, source.name + "\\n(level " + source.maxDistanceFromSource + "/" + maxLevel + ")" AS startNodeTitle
59+
WITH *, target.name + "\\n(level " + target.maxDistanceFromSource + "/" + maxLevel + ")" AS endNodeTitle
60+
// The longest path will be highlighted in red.
61+
WITH *, CASE WHEN isPartOfLongestPath THEN "; color=\"red\""
62+
// Dependencies contributing to the longest path will be highlighted in dark orange.
63+
WHEN contributesToALongestPath THEN "; color=\"darkorange\""
64+
ELSE "" END AS edgeColor
65+
// Prepare the GraphViz edge attributes for the visualization
66+
WITH *, "[label=" + weight + "; penwidth=" + penWidth + edgeColor + "; ];" AS graphVizEdgeAttributes
67+
// Assemble the final GraphViz DOT notation line for the edge representing the current dependency
68+
WITH *, "\"" + startNodeTitle + "\" -> \"" + endNodeTitle + "\" " + graphVizEdgeAttributes AS graphVizDotNotationLine
69+
RETURN DISTINCT graphVizDotNotationLine
70+
// Debugging
71+
// ,source.name
72+
// ,target.name
73+
// ,numberOfLongestPathsPassing
74+
// ,lengthOfLongestPathPassing
75+
// ,contributesToALongestPath
76+
// ,isPartOfLongestPath
77+
LIMIT 440
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Path Finding - Longest path - Stream - Find the top 100 dependencies contributing to the longest paths for Visualization with GraphViz
2+
3+
MATCH (sourceNodeForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetNodeForStatistics)
4+
WHERE $dependencies_projection_node IN LABELS(sourceNodeForStatistics)
5+
AND $dependencies_projection_node IN LABELS(targetNodeForStatistics)
6+
WITH min(dependencyForStatistics[$dependencies_projection_weight_property]) AS minWeight
7+
,max(dependencyForStatistics[$dependencies_projection_weight_property]) AS maxWeight
8+
,max(targetNodeForStatistics.maxDistanceFromSource) AS maxLevel
9+
WITH *, 1.0 / toFloat(maxWeight - minWeight) AS weightNormalizationFactor
10+
CALL gds.dag.longestPath.stream($dependencies_projection + '-cleaned')
11+
YIELD index, totalCost, path
12+
WITH *, toInteger(totalCost) AS distance
13+
ORDER BY distance DESC, index ASC
14+
UNWIND relationships(path) AS pathRelationship
15+
WITH *
16+
,startNode(pathRelationship) AS startNode
17+
,endNode(pathRelationship) AS endNode
18+
MATCH (startNode)-[dependency:DEPENDS_ON]->(endNode)
19+
WITH *, dependency[$dependencies_projection_weight_property] AS weight
20+
WITH *, toFloat(weight - minWeight) * weightNormalizationFactor AS normalizedWeight
21+
WITH *, round((normalizedWeight * 5) + 1, 2) AS penWidth
22+
WITH *, startNode.name + "\\n(level " + startNode.maxDistanceFromSource + "/" + maxLevel + ")" AS startNodeTitle
23+
WITH *, endNode.name + "\\n(level " + endNode.maxDistanceFromSource + "/" + maxLevel + ")" AS endNodeTitle
24+
WITH *, "[label=" + weight + "; penwidth=" + penWidth + "; ];" AS graphVizEdgeAttributes
25+
WITH *, "\"" + startNodeTitle + "\" -> \"" + endNodeTitle + "\" " + graphVizEdgeAttributes AS graphVizDotNotationLine
26+
RETURN graphVizDotNotationLine
27+
// Debugging
28+
// RETURN startNode.name AS startNodeName
29+
// ,endNode.name AS endNodeName
30+
// ,dependency[$dependencies_projection_weight_property] AS dependencyWeight
31+
// ,max(distance) AS partOfLongestPathLength
32+
// ,count(DISTINCT index) AS partOfLongestPathCounts
33+
// ,startNode.maxDistanceFromSource AS startNodeLevel
34+
// ,endNode.maxDistanceFromSource AS endNodeLevel
35+
LIMIT 100

cypher/Path_Finding/Set_Parameters.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Example on how to set the parameters for centrality in this case for Packages and PageRank
1+
// Example on how to set the parameters for path finding in this case for Packages and PageRank
22

33
:params {
44
"dependencies_projection": "package-path-finding",
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env bash
2+
3+
# Executes selected "Path_Finding" Cypher queries for GraphViz visualization.
4+
# Visualizes Java Artifact and TypeScript Module dependencies with their longest paths.
5+
# It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
6+
# The reports (csv, dot and svg files) will be written into the sub directory reports/path-finding-visualization.
7+
8+
# Requires executeQueryFunctions.sh, projectionFunctions.sh, visualizeQueryResults.sh, cleanupAfterReportGeneration.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )}
21+
echo "PathFindingVisualization: REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR}"
22+
23+
# Get the "scripts" directory by taking the path of this script and going one directory up.
24+
SCRIPTS_DIR=${SCRIPTS_DIR:-"${REPORTS_SCRIPT_DIR}/.."} # Repository directory containing the shell scripts
25+
echo "PathFindingVisualization SCRIPTS_DIR=${SCRIPTS_DIR}"
26+
27+
# Get the "scripts/visualization" directory.
28+
VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR:-"${SCRIPTS_DIR}/visualization"} # Repository directory containing the shell scripts for visualization
29+
echo "PathFindingVisualization VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR}"
30+
31+
# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher".
32+
CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"}
33+
echo "PathFindingVisualization CYPHER_DIR=${CYPHER_DIR}"
34+
35+
PATH_FINDINGS_CYPHER_DIR="${CYPHER_DIR}/Path_Finding"
36+
37+
# Define functions to execute cypher queries from within a given file
38+
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
39+
40+
# Define functions to create and delete Graph Projections like "createDirectedDependencyProjection"
41+
source "${SCRIPTS_DIR}/projectionFunctions.sh"
42+
43+
# Create report directory
44+
REPORT_NAME="path-finding-visualization"
45+
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
46+
mkdir -p "${FULL_REPORT_DIRECTORY}"
47+
48+
# Java Artifacts: Longest Paths Visualization
49+
ARTIFACT_PROJECTION="dependencies_projection=artifact-path-finding"
50+
ARTIFACT_NODE="dependencies_projection_node=Artifact"
51+
ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight"
52+
53+
if createDirectedDependencyProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"; then
54+
reportName="JavaArtifactLongestPathsIsolated"
55+
echo "PathFindingVisualization: Creating visualization ${reportName}..."
56+
execute_cypher "${PATH_FINDINGS_CYPHER_DIR}/Path_Finding_6_Longest_paths_for_graphviz.cypher" "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" > "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
57+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
58+
59+
reportName="JavaArtifactLongestPaths"
60+
echo "PathFindingVisualization: Creating visualization ${reportName}..."
61+
execute_cypher "${PATH_FINDINGS_CYPHER_DIR}/Path_Finding_6_Longest_paths_contributors_for_graphviz.cypher" "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" > "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
62+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
63+
fi
64+
65+
# TypeScript Modules: Longest Paths Visualization
66+
MODULE_LANGUAGE="dependencies_projection_language=Typescript"
67+
MODULE_PROJECTION="dependencies_projection=typescript-module-path-finding"
68+
MODULE_NODE="dependencies_projection_node=Module"
69+
MODULE_WEIGHT="dependencies_projection_weight_property=lowCouplingElement25PercentWeight"
70+
71+
if createDirectedDependencyProjection "${MODULE_LANGUAGE}" "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}"; then
72+
reportName="TypeScriptModuleLongestPathsIsolated"
73+
echo "PathFindingVisualization: Creating visualization ${reportName}..."
74+
execute_cypher "${PATH_FINDINGS_CYPHER_DIR}/Path_Finding_6_Longest_paths_for_graphviz.cypher" "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}" > "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
75+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
76+
77+
reportName="TypeScriptModuleLongestPaths"
78+
echo "PathFindingVisualization: Creating visualization ${reportName}..."
79+
execute_cypher "${PATH_FINDINGS_CYPHER_DIR}/Path_Finding_6_Longest_paths_contributors_for_graphviz.cypher" "${MODULE_PROJECTION}" "${MODULE_NODE}" "${MODULE_WEIGHT}" > "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
80+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${FULL_REPORT_DIRECTORY}/${reportName}.csv"
81+
fi
82+
83+
# Clean-up after report generation. Empty reports will be deleted.
84+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"

0 commit comments

Comments
 (0)