From 30f5f7cce63ef1844c53cd83c9a3895745ecde85 Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Mon, 22 Aug 2016 21:04:54 +0200 Subject: [PATCH 01/13] Add testAlternatingHeadersHit() --- integrationtests.py | 51 +++++++++++++++++++ .../hits-and-misses/.gitignore | 4 ++ .../stable-source-with-alternating-header.cpp | 9 ++++ 3 files changed, 64 insertions(+) create mode 100644 tests/integrationtests/hits-and-misses/.gitignore create mode 100644 tests/integrationtests/hits-and-misses/stable-source-with-alternating-header.cpp diff --git a/integrationtests.py b/integrationtests.py index cc485f68..fbe2272c 100644 --- a/integrationtests.py +++ b/integrationtests.py @@ -233,6 +233,57 @@ def testHitsSimple(self): newHits = stats.numCacheHits() self.assertEqual(newHits, oldHits + 1) + def testAlternatingHeadersHit(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 2 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 2\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 1 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 2 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 2) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + class TestPrecompiledHeaders(unittest.TestCase): def testSampleproject(self): diff --git a/tests/integrationtests/hits-and-misses/.gitignore b/tests/integrationtests/hits-and-misses/.gitignore new file mode 100644 index 00000000..0c501438 --- /dev/null +++ b/tests/integrationtests/hits-and-misses/.gitignore @@ -0,0 +1,4 @@ +*.obj + +# written by the tests +stable-source-with-alternating-header.h diff --git a/tests/integrationtests/hits-and-misses/stable-source-with-alternating-header.cpp b/tests/integrationtests/hits-and-misses/stable-source-with-alternating-header.cpp new file mode 100644 index 00000000..32031da4 --- /dev/null +++ b/tests/integrationtests/hits-and-misses/stable-source-with-alternating-header.cpp @@ -0,0 +1,9 @@ +#include + +#include "stable-source-with-alternating-header.h" + +int main() +{ + std::cout << "A C++ file we compile twice and expect a hit" << std::endl; + return 0; +} From 21e41d5711b4ef5837e93f3779b345dae078fd7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Fri, 26 Aug 2016 16:12:54 +0200 Subject: [PATCH 02/13] More integration tests for header changes --- integrationtests.py | 153 ++++++++++++++++++ .../stable-source-transitive-header.cpp | 9 ++ .../hits-and-misses/transitive-header.h | 1 + 3 files changed, 163 insertions(+) create mode 100644 tests/integrationtests/hits-and-misses/stable-source-transitive-header.cpp create mode 100644 tests/integrationtests/hits-and-misses/transitive-header.h diff --git a/integrationtests.py b/integrationtests.py index fbe2272c..752f0a9a 100644 --- a/integrationtests.py +++ b/integrationtests.py @@ -284,6 +284,159 @@ def testAlternatingHeadersHit(self): self.assertEqual(stats.numCacheMisses(), 2) self.assertEqual(stats.numCacheEntries(), 2) + def testRemovedHeader(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # Remove header, trigger the compiler which should fail + os.remove('stable-source-with-alternating-header.h') + with self.assertRaises(subprocess.CalledProcessError): + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 1 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 1) + + # Remove header again, trigger the compiler which should fail + os.remove('stable-source-with-alternating-header.h') + with self.assertRaises(subprocess.CalledProcessError): + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 3) + self.assertEqual(stats.numCacheEntries(), 1) + + def testAlternatingTransitiveHeader(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 2 + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 2\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 1 again + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 2 again + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 2) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + def testRemovedTransitiveHeader(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # Remove header, trigger the compiler which should fail + os.remove('alternating-header.h') + with self.assertRaises(subprocess.CalledProcessError): + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 1 again + with open('alternating-header.h', 'w') as f: + f.write("#define VERSION 1\n") + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 1) + + # Remove header again, trigger the compiler which should fail + os.remove('alternating-header.h') + with self.assertRaises(subprocess.CalledProcessError): + subprocess.check_call(baseCmd + ["stable-source-transitive-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 3) + self.assertEqual(stats.numCacheEntries(), 1) + class TestPrecompiledHeaders(unittest.TestCase): def testSampleproject(self): diff --git a/tests/integrationtests/hits-and-misses/stable-source-transitive-header.cpp b/tests/integrationtests/hits-and-misses/stable-source-transitive-header.cpp new file mode 100644 index 00000000..e713db2a --- /dev/null +++ b/tests/integrationtests/hits-and-misses/stable-source-transitive-header.cpp @@ -0,0 +1,9 @@ +#include + +#include "transitive-header.h" + +int main() +{ + std::cout << "A C++ file we compile twice and expect a hit" << std::endl; + return 0; +} diff --git a/tests/integrationtests/hits-and-misses/transitive-header.h b/tests/integrationtests/hits-and-misses/transitive-header.h new file mode 100644 index 00000000..47aa41e8 --- /dev/null +++ b/tests/integrationtests/hits-and-misses/transitive-header.h @@ -0,0 +1 @@ +#include "alternating-header.h" From cc2ba81833ab1b1042bb4ba8fa628b50d78e9dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 18:05:46 +0200 Subject: [PATCH 03/13] Add alternating header order hit & miss test --- integrationtests.py | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/integrationtests.py b/integrationtests.py index 752f0a9a..a823941b 100644 --- a/integrationtests.py +++ b/integrationtests.py @@ -437,6 +437,66 @@ def testRemovedTransitiveHeader(self): self.assertEqual(stats.numCacheMisses(), 3) self.assertEqual(stats.numCacheEntries(), 1) + def testAlternatingIncludeOrder(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with open('A.h', 'w') as header: + header.write('#define A 1\n') + with open('B.h', 'w') as header: + header.write('#define B 1\n') + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + f.write('#include "B.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 2 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "B.h"\n') + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 1 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + f.write('#include "B.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 2 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "B.h"\n') + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 2) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + class TestPrecompiledHeaders(unittest.TestCase): def testSampleproject(self): From 0571949413eee42799da31fd2c5ba39f4b6f6c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 18:17:24 +0200 Subject: [PATCH 04/13] Add integration test for repeated includes --- integrationtests.py | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/integrationtests.py b/integrationtests.py index a823941b..a2b9c1f8 100644 --- a/integrationtests.py +++ b/integrationtests.py @@ -497,6 +497,64 @@ def testAlternatingIncludeOrder(self): self.assertEqual(stats.numCacheMisses(), 2) self.assertEqual(stats.numCacheEntries(), 2) + def testRepeatedIncludes(self): + with cd(os.path.join(ASSETS_DIR, "hits-and-misses")), tempfile.TemporaryDirectory() as tempDir: + cache = clcache.Cache(tempDir) + customEnv = dict(os.environ, CLCACHE_DIR=tempDir) + baseCmd = CLCACHE_CMD + ["/nologo", "/EHsc", "/c"] + + with open('A.h', 'w') as header: + header.write('#define A 1\n') + with open('B.h', 'w') as header: + header.write('#define B 1\n') + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 0) + self.assertEqual(stats.numCacheEntries(), 0) + + # VERSION 1 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 1) + self.assertEqual(stats.numCacheEntries(), 1) + + # VERSION 2 + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 0) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 1 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 1) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + + # VERSION 2 again + with open('stable-source-with-alternating-header.h', 'w') as f: + f.write('#include "A.h"\n') + subprocess.check_call(baseCmd + ["stable-source-with-alternating-header.cpp"], env=customEnv) + + with cache.statistics as stats: + self.assertEqual(stats.numCacheHits(), 2) + self.assertEqual(stats.numCacheMisses(), 2) + self.assertEqual(stats.numCacheEntries(), 2) + class TestPrecompiledHeaders(unittest.TestCase): def testSampleproject(self): From eb1e26fe8ac4816b22d67bd738af666edbf4a672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Wed, 24 Aug 2016 19:51:34 +0200 Subject: [PATCH 05/13] Change Manifests to store a list of ManifestEntires. Always update manifests instead of creating new ones. A ManifestEntry contains: * A list of includePaths * A hash of the contents of the includes * A hash of the object file compiled with this set of includes --- clcache.py | 137 +++++++++++++++++++++++++++------------------------ unittests.py | 45 +++++++++-------- 2 files changed, 95 insertions(+), 87 deletions(-) diff --git a/clcache.py b/clcache.py index e2dc30bd..9e668b8a 100644 --- a/clcache.py +++ b/clcache.py @@ -58,11 +58,11 @@ # to use it as mark for relative path. BASEDIR_REPLACEMENT = '?' +# ManifestEntry: an entry in a manifest file # `includeFiles`: list of paths to include files, which this source file uses -# `includesContentToObjectMap`: dictionary -# key: cumulative hash of all include files' content in includeFiles -# value: key in the cache, under which the object file is stored -Manifest = namedtuple('Manifest', ['includeFiles', 'includesContentToObjectMap']) +# `includesContentsHash`: hash of the contents of the includeFiles +# `objectHash`: hash of the object in cache +ManifestEntry = namedtuple('ManifestEntry', ['includeFiles', 'includesContentHash', 'objectHash']) CompilerArtifacts = namedtuple('CompilerArtifacts', ['objectFilePath', 'stdout', 'stderr']) @@ -108,10 +108,6 @@ class IncludeNotFoundException(Exception): pass -class IncludeChangedException(Exception): - pass - - class CacheLockException(Exception): pass @@ -125,6 +121,17 @@ def __str__(self): return repr(self.message) +class Manifest(object): + def __init__(self, entries): + self._entries = entries + + def entries(self): + return self._entries + + def addEntry(self, entry): + self._entries.append(entry) + + class ManifestSection(object): def __init__(self, manifestSectionDir): self.manifestSectionDir = manifestSectionDir @@ -140,7 +147,9 @@ def setManifest(self, manifestHash, manifest): ensureDirectoryExists(self.manifestSectionDir) with open(self.manifestPath(manifestHash), 'w') as outFile: # Converting namedtuple to JSON via OrderedDict preserves key names and keys order - json.dump(manifest._asdict(), outFile, sort_keys=True, indent=2) + entries = [e._asdict() for e in manifest.entries()] + jsonobject = {'entries': entries} + json.dump(jsonobject, outFile, sort_keys=True, indent=2) def getManifest(self, manifestHash): fileName = self.manifestPath(manifestHash) @@ -149,7 +158,8 @@ def getManifest(self, manifestHash): try: with open(fileName, 'r') as inFile: doc = json.load(inFile) - return Manifest(doc['includeFiles'], doc['includesContentToObjectMap']) + return Manifest([ManifestEntry(e['includeFiles'], e['includesContentHash'], e['objectHash']) + for e in doc['entries']]) except IOError: return None @@ -172,7 +182,7 @@ class ManifestRepository(object): # invalidation, such that a manifest that was stored using the old format is not # interpreted using the new format. Instead the old file will not be touched # again due to a new manifest hash and is cleaned away after some time. - MANIFEST_FILE_FORMAT_VERSION = 4 + MANIFEST_FILE_FORMAT_VERSION = 5 def __init__(self, manifestsRootDir): self._manifestsRootDir = manifestsRootDir @@ -219,26 +229,19 @@ def getManifestHash(compilerBinary, commandLine, sourceFile): @staticmethod def getIncludesContentHashForFiles(includes): - listOfIncludesHashes = [] - includeMissing = False + listOfHashes = [] - for path in sorted(includes.keys()): + for path in includes: try: - fileHash = getFileHash(path) - if fileHash != includes[path]: - raise IncludeChangedException() - listOfIncludesHashes.append(fileHash) + listOfHashes.append(getFileHash(path)) except FileNotFoundError: - includeMissing = True + raise IncludeNotFoundException - if includeMissing: - raise IncludeNotFoundException() - - return ManifestRepository.getIncludesContentHashForHashes(listOfIncludesHashes) + return ManifestRepository.getIncludesContentHashForHashes(listOfHashes) @staticmethod - def getIncludesContentHashForHashes(listOfIncludesHashes): - return HashAlgorithm(','.join(listOfIncludesHashes).encode()).hexdigest() + def getIncludesContentHashForHashes(listOfHashes): + return HashAlgorithm(','.join(listOfHashes).encode()).hexdigest() class CacheLock(object): @@ -754,7 +757,8 @@ def getStringHash(dataString): return hasher.hexdigest() -def expandBasedirPlaceholder(path, baseDir): +def expandBasedirPlaceholder(path): + baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) if path.startswith(BASEDIR_REPLACEMENT): if not baseDir: raise LogicException('No CLCACHE_BASEDIR set, but found relative path ' + path) @@ -763,13 +767,17 @@ def expandBasedirPlaceholder(path, baseDir): return path -def collapseBasedirToPlaceholder(path, baseDir): - assert path == os.path.normcase(path) - assert baseDir == os.path.normcase(baseDir) - if path.startswith(baseDir): - return path.replace(baseDir, BASEDIR_REPLACEMENT, 1) - else: +def collapseBasedirToPlaceholder(path): + baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) + if baseDir is None: return path + else: + assert path == os.path.normcase(path) + assert baseDir == os.path.normcase(baseDir) + if path.startswith(baseDir): + return path.replace(baseDir, BASEDIR_REPLACEMENT, 1) + else: + return path def ensureDirectoryExists(path): @@ -1371,24 +1379,24 @@ def processCacheHit(cache, objectFile, cachekey): return 0, cachedArtifacts.stdout, cachedArtifacts.stderr, False -def createManifest(manifestHash, includePaths): - baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) - - includes = {path:getFileHash(path) for path in includePaths} - includesContentHash = ManifestRepository.getIncludesContentHashForFiles(includes) +def createManifestEntry(manifestHash, includePaths): + includesWithHash = [[path, getFileHash(path)] for path in includePaths] + includesContentHash = ManifestRepository.getIncludesContentHashForHashes( + [hash for include, hash in includesWithHash]) cachekey = CompilerArtifactsRepository.computeKeyDirect(manifestHash, includesContentHash) - # Create new manifest - if baseDir: - relocatableIncludePaths = { - collapseBasedirToPlaceholder(path, baseDir):contentHash - for path, contentHash in includes.items() - } - manifest = Manifest(relocatableIncludePaths, {}) - else: - manifest = Manifest(includes, {}) - manifest.includesContentToObjectMap[includesContentHash] = cachekey - return manifest, cachekey + safeIncludes = [collapseBasedirToPlaceholder(path) for path, contentHash in includesWithHash] + return ManifestEntry(safeIncludes, includesContentHash, cachekey) + + +def createOrUpdateManifest(manifestSection, manifestHash, includePaths): + entry = createManifestEntry(manifestHash, includePaths) + manifest = manifestSection.getManifest(manifestHash) + if manifest is None: + manifest = Manifest([]) + manifest.addEntry(entry) + manifestSection.setManifest(manifestHash, manifest) + return manifest, entry.objectHash def postprocessUnusableManifestMiss( @@ -1401,8 +1409,7 @@ def postprocessUnusableManifestMiss( returnCode, compilerOutput, compilerStderr = invokeRealCompiler(compiler, cmdLine, captureOutput=True) includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes) - if returnCode == 0 and os.path.exists(objectFile): - manifest, cachekey = createManifest(manifestHash, includePaths) + manifest, cachekey = createOrUpdateManifest(manifestSection, manifestHash, includePaths) cleanupRequired = False section = cache.compilerArtifactsRepository.section(cachekey) @@ -1551,7 +1558,6 @@ def processCompileRequest(cache, compiler, args): def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): - baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR')) manifestHash = ManifestRepository.getManifestHash(compiler, cmdLine, sourceFile) manifestSection = cache.manifestRepository.section(manifestHash) with manifestSection.lock: @@ -1561,21 +1567,24 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, Statistics.registerSourceChangedMiss) - # NOTE: command line options already included in hash for manifest name - try: - includesContentHash = ManifestRepository.getIncludesContentHashForFiles({ - expandBasedirPlaceholder(path, baseDir):contentHash - for path, contentHash in manifest.includeFiles.items() - }) - except IncludeChangedException: - return postprocessUnusableManifestMiss( - cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, - Statistics.registerHeaderChangedMiss) + for entry in manifest.entries(): + # NOTE: command line options already included in hash for manifest name + try: + includesContentHash = ManifestRepository.getIncludesContentHashForFiles( + [expandBasedirPlaceholder(path) for path in entry.includeFiles]) + + if entry.includesContentHash == includesContentHash: + cachekey = entry.objectHash + assert cachekey is not None - cachekey = manifest.includesContentToObjectMap.get(includesContentHash) - assert cachekey is not None + return getOrSetArtifacts( + cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss) + except IncludeNotFoundException: + pass - return getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss) + return postprocessUnusableManifestMiss( + cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, + Statistics.registerHeaderChangedMiss) def processNoDirect(cache, objectFile, compiler, cmdLine, environment): diff --git a/unittests.py b/unittests.py index 3fecdc57..64b8d8d7 100644 --- a/unittests.py +++ b/unittests.py @@ -20,6 +20,7 @@ CompilerArtifactsRepository, Configuration, Manifest, + ManifestEntry, ManifestRepository, Statistics, ) @@ -208,6 +209,17 @@ def testHitCounts(self): class TestManifestRepository(unittest.TestCase): + entry1 = ManifestEntry([r'somepath\myinclude.h'], + "fdde59862785f9f0ad6e661b9b5746b7", + "a649723940dc975ebd17167d29a532f8") + entry2 = ManifestEntry([r'somepath\myinclude.h', r'moreincludes.h'], + "474e7fc26a592d84dfa7416c10f036c6", + "8771d7ebcf6c8bd57a3d6485f63e3a89") + # Size in (120, 240] bytes + manifest1 = Manifest([entry1]) + # Size in (120, 240] bytes + manifest2 = Manifest([entry2]) + def _getDirectorySize(self, dirPath): def filesize(path, filename): return os.stat(os.path.join(path, filename)).st_size @@ -269,28 +281,21 @@ def testStoreAndGetManifest(self): manifestsRootDir = os.path.join(ASSETS_DIR, "manifests") mm = ManifestRepository(manifestsRootDir) - manifest1 = Manifest([r'somepath\myinclude.h'], { - "fdde59862785f9f0ad6e661b9b5746b7": "a649723940dc975ebd17167d29a532f8" - }) - manifest2 = Manifest([r'somepath\myinclude.h', 'moreincludes.h'], { - "474e7fc26a592d84dfa7416c10f036c6": "8771d7ebcf6c8bd57a3d6485f63e3a89" - }) - ms1 = mm.section("8a33738d88be7edbacef48e262bbb5bc") ms2 = mm.section("0623305942d216c165970948424ae7d1") - ms1.setManifest("8a33738d88be7edbacef48e262bbb5bc", manifest1) - ms2.setManifest("0623305942d216c165970948424ae7d1", manifest2) + ms1.setManifest("8a33738d88be7edbacef48e262bbb5bc", TestManifestRepository.manifest1) + ms2.setManifest("0623305942d216c165970948424ae7d1", TestManifestRepository.manifest2) retrieved1 = ms1.getManifest("8a33738d88be7edbacef48e262bbb5bc") self.assertIsNotNone(retrieved1) - self.assertEqual(retrieved1.includesContentToObjectMap["fdde59862785f9f0ad6e661b9b5746b7"], - "a649723940dc975ebd17167d29a532f8") + retrieved1Entry = retrieved1.entries()[0] + self.assertEqual(retrieved1Entry, TestManifestRepository.entry1) retrieved2 = ms2.getManifest("0623305942d216c165970948424ae7d1") self.assertIsNotNone(retrieved2) - self.assertEqual(retrieved2.includesContentToObjectMap["474e7fc26a592d84dfa7416c10f036c6"], - "8771d7ebcf6c8bd57a3d6485f63e3a89") + retrieved2Entry = retrieved2.entries()[0] + self.assertEqual(retrieved2Entry, TestManifestRepository.entry2) def testNonExistingManifest(self): manifestsRootDir = os.path.join(ASSETS_DIR, "manifests") @@ -303,16 +308,10 @@ def testClean(self): manifestsRootDir = os.path.join(ASSETS_DIR, "manifests") mm = ManifestRepository(manifestsRootDir) - # Size in (120, 240] bytes - manifest1 = Manifest([r'somepath\myinclude.h'], { - "fdde59862785f9f0ad6e661b9b5746b7": "a649723940dc975ebd17167d29a532f8" - }) - # Size in (120, 240] bytes - manifest2 = Manifest([r'somepath\myinclude.h', 'moreincludes.h'], { - "474e7fc26a592d84dfa7416c10f036c6": "8771d7ebcf6c8bd57a3d6485f63e3a89" - }) - mm.section("8a33738d88be7edbacef48e262bbb5bc").setManifest("8a33738d88be7edbacef48e262bbb5bc", manifest1) - mm.section("0623305942d216c165970948424ae7d1").setManifest("0623305942d216c165970948424ae7d1", manifest2) + mm.section("8a33738d88be7edbacef48e262bbb5bc").setManifest("8a33738d88be7edbacef48e262bbb5bc", + TestManifestRepository.manifest1) + mm.section("0623305942d216c165970948424ae7d1").setManifest("0623305942d216c165970948424ae7d1", + TestManifestRepository.manifest2) cleaningResultSize = mm.clean(240) # Only one of those manifests can be left From 54bbbec1dc4ee5bbba431a1b64d40dc40226c19e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Fri, 26 Aug 2016 16:51:12 +0200 Subject: [PATCH 06/13] Add logging when writing manifest files --- clcache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clcache.py b/clcache.py index 9e668b8a..fbf6d698 100644 --- a/clcache.py +++ b/clcache.py @@ -144,8 +144,10 @@ def manifestFiles(self): return filesBeneath(self.manifestSectionDir) def setManifest(self, manifestHash, manifest): + manifestPath = self.manifestPath(manifestHash) + printTraceStatement("Writing manifest with manifestHash = {} to {}".format(manifestHash, manifestPath)) ensureDirectoryExists(self.manifestSectionDir) - with open(self.manifestPath(manifestHash), 'w') as outFile: + with open(manifestPath, 'w') as outFile: # Converting namedtuple to JSON via OrderedDict preserves key names and keys order entries = [e._asdict() for e in manifest.entries()] jsonobject = {'entries': entries} From b7cac4c3cb74842a0d6f8644e9140fc0f45545ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 16:14:08 +0200 Subject: [PATCH 07/13] Keep manifest entries ordered in least-recently-used order --- clcache.py | 14 ++++++++++++-- unittests.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/clcache.py b/clcache.py index fbf6d698..933c8fd1 100644 --- a/clcache.py +++ b/clcache.py @@ -129,7 +129,14 @@ def entries(self): return self._entries def addEntry(self, entry): - self._entries.append(entry) + """Adds entry at the top of the entries""" + self._entries.insert(0, entry) + + def touchEntry(self, entryIndex): + """Moves entry in entryIndex position to the top of entries()""" + entry = self._entries[entryIndex] + self._entries.pop(entryIndex) + self._entries.insert(0, entry) class ManifestSection(object): @@ -1569,7 +1576,7 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine, Statistics.registerSourceChangedMiss) - for entry in manifest.entries(): + for entryIndex, entry in enumerate(manifest.entries()): # NOTE: command line options already included in hash for manifest name try: includesContentHash = ManifestRepository.getIncludesContentHashForFiles( @@ -1578,6 +1585,9 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): if entry.includesContentHash == includesContentHash: cachekey = entry.objectHash assert cachekey is not None + # Move manifest entry to the top of the entries in the manifest + manifest.touchEntry(entryIndex) + manifestSection.setManifest(manifestHash, manifest) return getOrSetArtifacts( cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss) diff --git a/unittests.py b/unittests.py index 64b8d8d7..8cdc4481 100644 --- a/unittests.py +++ b/unittests.py @@ -904,6 +904,29 @@ def testParseIncludesGerman(self): r'c:\program files (x86)\microsoft visual studio 12.0\vc\include\concurrencysal.h' in includesSet) self.assertTrue(r'' not in includesSet) +class TestManifest(unittest.TestCase): + entry1 = ManifestEntry([r'somepath\myinclude.h'], + "fdde59862785f9f0ad6e661b9b5746b7", + "a649723940dc975ebd17167d29a532f8") + entry2 = ManifestEntry([r'somepath\myinclude.h', r'moreincludes.h'], + "474e7fc26a592d84dfa7416c10f036c6", + "8771d7ebcf6c8bd57a3d6485f63e3a89") + entries = [entry1, entry2] + + def testCreation(self): + manifest = Manifest() + self.assertFalse(manifest.entries()) + + def testCreation(self): + manifest = Manifest(TestManifest.entries) + self.assertEqual(TestManifest.entries, manifest.entries()) + + def testAddEntry(self): + manifest = Manifest(TestManifest.entries) + self.assertEqual(TestManifest.entry1, manifest.entries()[0]) + manifest.touchEntry(1) + self.assertEqual(TestManifest.entry2, manifest.entries()[0]) + if __name__ == '__main__': unittest.TestCase.longMessage = True From 78045100fb81e160d05269c26b81f2844d0c0c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 17:22:13 +0200 Subject: [PATCH 08/13] Fix bug adding entries to the cache --- clcache.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clcache.py b/clcache.py index 933c8fd1..6f20a276 100644 --- a/clcache.py +++ b/clcache.py @@ -1398,14 +1398,13 @@ def createManifestEntry(manifestHash, includePaths): return ManifestEntry(safeIncludes, includesContentHash, cachekey) -def createOrUpdateManifest(manifestSection, manifestHash, includePaths): - entry = createManifestEntry(manifestHash, includePaths) +def createOrUpdateManifest(manifestSection, manifestHash, entry): manifest = manifestSection.getManifest(manifestHash) if manifest is None: manifest = Manifest([]) manifest.addEntry(entry) manifestSection.setManifest(manifestHash, manifest) - return manifest, entry.objectHash + return manifest def postprocessUnusableManifestMiss( @@ -1418,7 +1417,8 @@ def postprocessUnusableManifestMiss( returnCode, compilerOutput, compilerStderr = invokeRealCompiler(compiler, cmdLine, captureOutput=True) includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes) - manifest, cachekey = createOrUpdateManifest(manifestSection, manifestHash, includePaths) + entry = createManifestEntry(manifestHash, includePaths) + cachekey = entry.objectHash cleanupRequired = False section = cache.compilerArtifactsRepository.section(cachekey) @@ -1427,6 +1427,7 @@ def postprocessUnusableManifestMiss( if returnCode == 0 and os.path.exists(objectFile): artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr) cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts) + manifest = createOrUpdateManifest(manifestSection, manifestHash, entry) manifestSection.setManifest(manifestHash, manifest) return returnCode, compilerOutput, compilerStderr, cleanupRequired From 4699760d69832915430764fe903514bd3504c86d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 17:28:46 +0200 Subject: [PATCH 09/13] Add empty constructor for manifest --- clcache.py | 6 ++++-- unittests.py | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/clcache.py b/clcache.py index 6f20a276..2153d65f 100644 --- a/clcache.py +++ b/clcache.py @@ -122,8 +122,10 @@ def __str__(self): class Manifest(object): - def __init__(self, entries): - self._entries = entries + def __init__(self, entries=None): + if entries is None: + entries = [] + self._entries = entries.copy() def entries(self): return self._entries diff --git a/unittests.py b/unittests.py index 8cdc4481..b704ea10 100644 --- a/unittests.py +++ b/unittests.py @@ -904,24 +904,35 @@ def testParseIncludesGerman(self): r'c:\program files (x86)\microsoft visual studio 12.0\vc\include\concurrencysal.h' in includesSet) self.assertTrue(r'' not in includesSet) + class TestManifest(unittest.TestCase): entry1 = ManifestEntry([r'somepath\myinclude.h'], - "fdde59862785f9f0ad6e661b9b5746b7", - "a649723940dc975ebd17167d29a532f8") + "fdde59862785f9f0ad6e661b9b5746b7", + "a649723940dc975ebd17167d29a532f8") entry2 = ManifestEntry([r'somepath\myinclude.h', r'moreincludes.h'], - "474e7fc26a592d84dfa7416c10f036c6", - "8771d7ebcf6c8bd57a3d6485f63e3a89") + "474e7fc26a592d84dfa7416c10f036c6", + "8771d7ebcf6c8bd57a3d6485f63e3a89") entries = [entry1, entry2] - def testCreation(self): + def testCreateEmpty(self): manifest = Manifest() self.assertFalse(manifest.entries()) - def testCreation(self): + def testCreateWithEntries(self): manifest = Manifest(TestManifest.entries) self.assertEqual(TestManifest.entries, manifest.entries()) + def testAddEntry(self): + manifest = Manifest(TestManifest.entries) + newEntry = ManifestEntry([r'somepath\myotherinclude.h'], + "474e7fc26a592d84dfa7416c10f036c6", + "8771d7ebcf6c8bd57a3d6485f63e3a89") + manifest.addEntry(newEntry) + self.assertEqual(newEntry, manifest.entries()[0]) + + + def testTouchEntry(self): manifest = Manifest(TestManifest.entries) self.assertEqual(TestManifest.entry1, manifest.entries()[0]) manifest.touchEntry(1) From 66fef2d6b6cb23720a42da68d629ad3972faafab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 17:29:45 +0200 Subject: [PATCH 10/13] Shorter getManifest or new Manifest --- clcache.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clcache.py b/clcache.py index 2153d65f..a3685008 100644 --- a/clcache.py +++ b/clcache.py @@ -1401,9 +1401,7 @@ def createManifestEntry(manifestHash, includePaths): def createOrUpdateManifest(manifestSection, manifestHash, entry): - manifest = manifestSection.getManifest(manifestHash) - if manifest is None: - manifest = Manifest([]) + manifest = manifestSection.getManifest(manifestHash) or Manifest() manifest.addEntry(entry) manifestSection.setManifest(manifestHash, manifest) return manifest From c3ba9b2df92698d3c9ad816dc9328de0fadb070c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Mon, 5 Sep 2016 18:18:12 +0200 Subject: [PATCH 11/13] Simplify createManifestEntry --- clcache.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clcache.py b/clcache.py index a3685008..e0e6e2e8 100644 --- a/clcache.py +++ b/clcache.py @@ -1391,12 +1391,11 @@ def processCacheHit(cache, objectFile, cachekey): def createManifestEntry(manifestHash, includePaths): - includesWithHash = [[path, getFileHash(path)] for path in includePaths] - includesContentHash = ManifestRepository.getIncludesContentHashForHashes( - [hash for include, hash in includesWithHash]) + includesWithHash = {path:getFileHash(path) for path in includePaths} + includesContentHash = ManifestRepository.getIncludesContentHashForHashes(includesWithHash.values()) cachekey = CompilerArtifactsRepository.computeKeyDirect(manifestHash, includesContentHash) - safeIncludes = [collapseBasedirToPlaceholder(path) for path, contentHash in includesWithHash] + safeIncludes = [collapseBasedirToPlaceholder(path) for path in includesWithHash.keys()] return ManifestEntry(safeIncludes, includesContentHash, cachekey) From 06c0e1b980bf81c985a1ecf3faea2693a26b8047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Wed, 7 Sep 2016 17:45:56 +0200 Subject: [PATCH 12/13] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e31ec4cd..554c2e3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ clcache changelog `CLCACHE_OBJECT_CACHE_TIMEOUT_MS` environment variable. * Improvement: Greatly improved concurrency of clcache such that concurrent invocations of the tool no longer block each other. + * Improvement: Improve hit rate when alternating between two identical + versions of the same source file that transitively get different contents of + the included files (a common case when switching back and forth between + branches). ## clcache 3.2.0 (2016-07-28) From 66fa5e83099f7524bdf59f3d6a0b00799951cffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Wed, 7 Sep 2016 17:46:09 +0200 Subject: [PATCH 13/13] Simplify touchEntry --- clcache.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clcache.py b/clcache.py index e0e6e2e8..fef54ef6 100644 --- a/clcache.py +++ b/clcache.py @@ -136,9 +136,7 @@ def addEntry(self, entry): def touchEntry(self, entryIndex): """Moves entry in entryIndex position to the top of entries()""" - entry = self._entries[entryIndex] - self._entries.pop(entryIndex) - self._entries.insert(0, entry) + self._entries.insert(0, self._entries.pop(entryIndex)) class ManifestSection(object):