From a2f38bd37fa60ba4658611740d93ade75cbefcb1 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 19 Apr 2025 12:49:27 +0200 Subject: [PATCH 1/2] Fix missing pairwise changed dependencies --- .../List_pairwise_changed_files_with_dependencies.cypher | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher b/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher index ee752248a..b4c66604e 100644 --- a/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher +++ b/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher @@ -2,7 +2,8 @@ MATCH (firstCodeFile:File)-[dependency:DEPENDS_ON]->(secondCodeFile:File) MATCH (firstCodeFile)-[pairwiseChange:CHANGED_TOGETHER_WITH]-(secondCodeFile) -WHERE elementId(firstCodeFile) < elementId(secondCodeFile) +//De-duplicating the pairs of files isn't necessary, because the dependency relation is directed. +//WHERE elementId(firstCodeFile) < elementId(secondCodeFile) WITH firstCodeFile.fileName AS firstFileName ,secondCodeFile.fileName AS secondFileName ,coalesce(dependency.weight, dependency.cardinality) AS dependencyWeight From 71d3519d50c7336e083841aaada0f3d8619fd0ec Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 19 Apr 2025 12:49:59 +0200 Subject: [PATCH 2/2] Calculate p-values only if there are enough samples --- jupyter/GitHistoryGeneral.ipynb | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/jupyter/GitHistoryGeneral.ipynb b/jupyter/GitHistoryGeneral.ipynb index 4010bb5b0..6192d4ce3 100644 --- a/jupyter/GitHistoryGeneral.ipynb +++ b/jupyter/GitHistoryGeneral.ipynb @@ -1338,15 +1338,26 @@ "display(pairwise_changed_git_files_with_dependencies.corr(method='pearson'))\n", "\n", "display(\"Pairwise changed git files compared to dependency weights - Spearman Correlation\")\n", - "display(pairwise_changed_git_files_with_dependencies.corr(method='spearman'))\n", - "\n", - "from scipy.stats import pearsonr, spearmanr\n", + "display(pairwise_changed_git_files_with_dependencies.corr(method='spearman'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a4ae651", + "metadata": {}, + "outputs": [], + "source": [ + "if pairwise_changed_git_files_with_dependencies.shape[0] < 5:\n", + " print(\"Less than 5 samples are not enough to calculate p-values\")\n", + "else:\n", + " from scipy.stats import pearsonr, spearmanr\n", "\n", - "display(\"Pearson Correlation with p-value for commitCount and dependencyWeight\")\n", - "display(pearsonr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))\n", + " display(\"Pearson Correlation with p-value for commitCount and dependencyWeight\")\n", + " display(pearsonr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))\n", "\n", - "display(\"Spearman Correlation with p-value for commitCount and dependencyWeight\")\n", - "display(spearmanr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))" + " display(\"Spearman Correlation with p-value for commitCount and dependencyWeight\")\n", + " display(spearmanr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))" ] }, {