From ca1a5801337f51df3c12519eea0e96223cec476e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela=20Pastor?= Date: Tue, 26 Sep 2017 14:13:52 -0700 Subject: [PATCH 1/7] Add atomicRename() and write json files to temporary files before rename According to documentation and some internet posts os.replace(...) is atomic (https://docs.python.org/3/library/os.html#os.replace). This commit changes the code writing the manifest and the statistics files to write to a temporary file and then use a new atomicRename() function to atomically replace the old file with then new contents. It should avoid corrupting the cache when the clcache process crashes or is killed during a write, partly addressing the root cause issue #263. Note that the object files are still not updated atomically. --- clcache.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/clcache.py b/clcache.py index 530fdb04..95b3d52e 100644 --- a/clcache.py +++ b/clcache.py @@ -116,6 +116,10 @@ def normalizeBaseDir(baseDir): return None +def atomicRename(tempFile, destFile): + os.replace(tempFile, destFile) + + class IncludeNotFoundException(Exception): pass @@ -177,11 +181,13 @@ def setManifest(self, manifestHash, manifest): manifestPath = self.manifestPath(manifestHash) printTraceStatement("Writing manifest with manifestHash = {} to {}".format(manifestHash, manifestPath)) ensureDirectoryExists(self.manifestSectionDir) - with open(manifestPath, 'w') as outFile: + tempManifest = manifestPath + ".new" + with open(tempManifest, 'w') as outFile: # Converting namedtuple to JSON via OrderedDict preserves key names and keys order entries = [e._asdict() for e in manifest.entries()] jsonobject = {'entries': entries} json.dump(jsonobject, outFile, sort_keys=True, indent=2) + atomicRename(tempManifest, manifestPath) def getManifest(self, manifestHash): fileName = self.manifestPath(manifestHash) @@ -637,8 +643,11 @@ def __init__(self, fileName): def save(self): if self._dirty: - with open(self._fileName, 'w') as f: + tempFileName = self._fileName + ".new" + with open(tempFileName, 'w') as f: json.dump(self._dict, f, sort_keys=True, indent=4) + atomicRename(tempFileName, self._fileName) + def __setitem__(self, key, value): self._dict[key] = value From fc01692de3f8db630a47e7a4e8a15c4914a9d4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela=20Pastor?= Date: Tue, 26 Sep 2017 15:01:31 -0700 Subject: [PATCH 2/7] Use atomicRename() from copyOrLink Use atomicRename() to save and restore the object files from the cache. It should reduce problems with corrupted object files in the cache as reported in issue #263. While it fixes the most important part (corrupted object files) it still it does not completely fix the problem as the cache entry is composed of: * object file * stdout * stderr which should be replaced atomically. --- clcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clcache.py b/clcache.py index 95b3d52e..b0385ef5 100644 --- a/clcache.py +++ b/clcache.py @@ -976,7 +976,7 @@ def copyOrLink(srcFilePath, dstFilePath): # lower the chances of corrupting it. tempDst = dstFilePath + '.tmp' copyfile(srcFilePath, tempDst) - os.rename(tempDst, dstFilePath) + atomicRename(tempDst, dstFilePath) def myExecutablePath(): From 62cbd0f50656485e5b6be7e8d7733b5076c34a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela=20Pastor?= Date: Tue, 26 Sep 2017 14:14:39 -0700 Subject: [PATCH 3/7] Read manifests and statistics without locking If writing the json files is atomic, reading can be done safely without locks. --- clcache.py | 73 +++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/clcache.py b/clcache.py index b0385ef5..e5e255b7 100644 --- a/clcache.py +++ b/clcache.py @@ -1670,33 +1670,33 @@ def processSingleSource(compiler, cmdLine, sourceFile, objectFile, environment): def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): manifestHash = ManifestRepository.getManifestHash(compiler, cmdLine, sourceFile) manifestHit = None - with cache.manifestLockFor(manifestHash): - manifest = cache.getManifest(manifestHash) - if manifest: - for entryIndex, entry in enumerate(manifest.entries()): - # NOTE: command line options already included in hash for manifest name - try: - includesContentHash = ManifestRepository.getIncludesContentHashForFiles( - [expandBasedirPlaceholder(path) for path in entry.includeFiles]) - - if entry.includesContentHash == includesContentHash: - cachekey = entry.objectHash - assert cachekey is not None - # Move manifest entry to the top of the entries in the manifest - manifest.touchEntry(entryIndex) - cache.setManifest(manifestHash, manifest) - - manifestHit = True - with cache.lockFor(cachekey): - if cache.hasEntry(cachekey): - return processCacheHit(cache, objectFile, cachekey) - - except IncludeNotFoundException: - pass - - unusableManifestMissReason = Statistics.registerHeaderChangedMiss - else: - unusableManifestMissReason = Statistics.registerSourceChangedMiss + manifest = cache.getManifest(manifestHash) + if manifest: + for entryIndex, entry in enumerate(manifest.entries()): + # NOTE: command line options already included in hash for manifest name + try: + includesContentHash = ManifestRepository.getIncludesContentHashForFiles( + [expandBasedirPlaceholder(path) for path in entry.includeFiles]) + + if entry.includesContentHash == includesContentHash: + cachekey = entry.objectHash + assert cachekey is not None + # Move manifest entry to the top of the entries in the manifest + #manifest.touchEntry(entryIndex) + #with cache.manifestLockFor(manifestHash): + #cache.setManifest(manifestHash, manifest) + + manifestHit = True + with cache.lockFor(cachekey): + if cache.hasEntry(cachekey): + return processCacheHit(cache, objectFile, cachekey) + + except IncludeNotFoundException: + pass + + unusableManifestMissReason = Statistics.registerHeaderChangedMiss + else: + unusableManifestMissReason = Statistics.registerSourceChangedMiss if manifestHit is None: stripIncludes = False @@ -1709,22 +1709,21 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): includePaths, compilerOutput = parseIncludesSet(compilerResult[1], sourceFile, stripIncludes) compilerResult = (compilerResult[0], compilerOutput, compilerResult[2]) - with cache.manifestLockFor(manifestHash): - if manifestHit is not None: - return ensureArtifactsExist(cache, cachekey, unusableManifestMissReason, - objectFile, compilerResult) - - entry = createManifestEntry(manifestHash, includePaths) - cachekey = entry.objectHash + if manifestHit is not None: + return ensureArtifactsExist(cache, cachekey, unusableManifestMissReason, + objectFile, compilerResult) - def addManifest(): + entry = createManifestEntry(manifestHash, includePaths) + cachekey = entry.objectHash + def addManifest(): + with cache.manifestLockFor(manifestHash): manifest = cache.getManifest(manifestHash) or Manifest() manifest.addEntry(entry) cache.setManifest(manifestHash, manifest) - return ensureArtifactsExist(cache, cachekey, unusableManifestMissReason, - objectFile, compilerResult, addManifest) + return ensureArtifactsExist(cache, cachekey, unusableManifestMissReason, + objectFile, compilerResult, addManifest) def processNoDirect(cache, objectFile, compiler, cmdLine, environment): From cf63867f2d0ef231d0d153f429eb4fb22dec65a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela=20Pastor?= Date: Tue, 26 Sep 2017 14:36:01 -0700 Subject: [PATCH 4/7] Re-add touchEntry Since the manifest is not locked for reading before calling touchEntry it may have changed on disk. It is needed to lock, read the manifest, touch and write it atomically. touchEntry(...) also cannot rely on the index to be valid after read, so it was re-implemented to use the objectKey to find the correct entry. --- clcache.py | 17 ++++++++++++----- unittests.py | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/clcache.py b/clcache.py index e5e255b7..9fc07f41 100644 --- a/clcache.py +++ b/clcache.py @@ -161,8 +161,13 @@ def addEntry(self, entry): """Adds entry at the top of the entries""" self._entries.insert(0, entry) - def touchEntry(self, entryIndex): + def touchEntry(self, objectHash): """Moves entry in entryIndex position to the top of entries()""" + entryIndex = 0 + for i, e in enumerate(self.entries()): + if e.objectHash == objectHash: + entryIndex = i + break self._entries.insert(0, self._entries.pop(entryIndex)) @@ -1681,10 +1686,12 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile): if entry.includesContentHash == includesContentHash: cachekey = entry.objectHash assert cachekey is not None - # Move manifest entry to the top of the entries in the manifest - #manifest.touchEntry(entryIndex) - #with cache.manifestLockFor(manifestHash): - #cache.setManifest(manifestHash, manifest) + if entryIndex > 0: + # Move manifest entry to the top of the entries in the manifest + with cache.manifestLockFor(manifestHash): + manifest = cache.getManifest(manifestHash) or Manifest() + manifest.touchEntry(cachekey) + cache.setManifest(manifestHash, manifest) manifestHit = True with cache.lockFor(cachekey): diff --git a/unittests.py b/unittests.py index 05190df1..1dcc5448 100644 --- a/unittests.py +++ b/unittests.py @@ -973,7 +973,7 @@ def testAddEntry(self): def testTouchEntry(self): manifest = Manifest(TestManifest.entries) self.assertEqual(TestManifest.entry1, manifest.entries()[0]) - manifest.touchEntry(1) + manifest.touchEntry("8771d7ebcf6c8bd57a3d6485f63e3a89") self.assertEqual(TestManifest.entry2, manifest.entries()[0]) From 41be5a774e5569048fcd2bc2f7ce092efff7ac48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela=20Pastor?= Date: Tue, 26 Sep 2017 14:52:24 -0700 Subject: [PATCH 5/7] Move cache lock inside clearCache, cleanCache and printStatistics Hide locking inside these functions to have more fine-grained control. Since writing to the statistics file is now atomic, locking is not needed. --- clcache.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/clcache.py b/clcache.py index 9fc07f41..dee8cbe0 100644 --- a/clcache.py +++ b/clcache.py @@ -1406,17 +1406,17 @@ def printStatistics(cache): def resetStatistics(cache): - with cache.statistics as stats: + with cache.statistics.lock, cache.statistics as stats: stats.resetCounters() def cleanCache(cache): - with cache.statistics as stats, cache.configuration as cfg: + with cache.lock, cache.statistics as stats, cache.configuration as cfg: cache.clean(stats, cfg.maximumCacheSize()) def clearCache(cache): - with cache.statistics as stats: + with cache.lock, cache.statistics as stats: cache.clean(stats, 0) @@ -1522,25 +1522,21 @@ def main(): cache = Cache() if len(sys.argv) == 2 and sys.argv[1] == "-s": - with cache.lock: - printStatistics(cache) + printStatistics(cache) return 0 if len(sys.argv) == 2 and sys.argv[1] == "-c": - with cache.lock: - cleanCache(cache) + cleanCache(cache) print('Cache cleaned') return 0 if len(sys.argv) == 2 and sys.argv[1] == "-C": - with cache.lock: - clearCache(cache) + clearCache(cache) print('Cache cleared') return 0 if len(sys.argv) == 2 and sys.argv[1] == "-z": - with cache.lock: - resetStatistics(cache) + resetStatistics(cache) print('Statistics reset') return 0 @@ -1652,8 +1648,7 @@ def scheduleJobs(cache, compiler, cmdLine, environment, sourceFiles, objectFiles break if cleanupRequired: - with cache.lock: - cleanCache(cache) + cleanCache(cache) return exitCode From 3631edb3aecd8e715962be21b67a800df49b9208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Tue, 10 Oct 2017 15:07:02 +0200 Subject: [PATCH 6/7] Simplify search expression in Manifest.touchEntry(...) --- clcache.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/clcache.py b/clcache.py index dee8cbe0..e3b87623 100644 --- a/clcache.py +++ b/clcache.py @@ -163,11 +163,7 @@ def addEntry(self, entry): def touchEntry(self, objectHash): """Moves entry in entryIndex position to the top of entries()""" - entryIndex = 0 - for i, e in enumerate(self.entries()): - if e.objectHash == objectHash: - entryIndex = i - break + entryIndex = next((i for i, e in enumerate(self.entries()) if e.objectHash == objectHash), 0) self._entries.insert(0, self._entries.pop(entryIndex)) From 5ca3a141857c752048c97208973af9736a3aed1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Si=C3=B1uela?= Date: Tue, 10 Oct 2017 15:18:26 +0200 Subject: [PATCH 7/7] Reduce duplication with atomicWrite context manager --- clcache.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/clcache.py b/clcache.py index e3b87623..9d898658 100644 --- a/clcache.py +++ b/clcache.py @@ -116,8 +116,12 @@ def normalizeBaseDir(baseDir): return None -def atomicRename(tempFile, destFile): - os.replace(tempFile, destFile) +@contextlib.contextmanager +def atomicWrite(fileName): + tempFileName = fileName + '.new' + with open(tempFileName, 'w') as f: + yield f + os.replace(tempFileName, fileName) class IncludeNotFoundException(Exception): @@ -182,13 +186,11 @@ def setManifest(self, manifestHash, manifest): manifestPath = self.manifestPath(manifestHash) printTraceStatement("Writing manifest with manifestHash = {} to {}".format(manifestHash, manifestPath)) ensureDirectoryExists(self.manifestSectionDir) - tempManifest = manifestPath + ".new" - with open(tempManifest, 'w') as outFile: + with atomicWrite(manifestPath) as outFile: # Converting namedtuple to JSON via OrderedDict preserves key names and keys order entries = [e._asdict() for e in manifest.entries()] jsonobject = {'entries': entries} json.dump(jsonobject, outFile, sort_keys=True, indent=2) - atomicRename(tempManifest, manifestPath) def getManifest(self, manifestHash): fileName = self.manifestPath(manifestHash) @@ -644,11 +646,8 @@ def __init__(self, fileName): def save(self): if self._dirty: - tempFileName = self._fileName + ".new" - with open(tempFileName, 'w') as f: + with atomicWrite(self._fileName) as f: json.dump(self._dict, f, sort_keys=True, indent=4) - atomicRename(tempFileName, self._fileName) - def __setitem__(self, key, value): self._dict[key] = value @@ -977,7 +976,7 @@ def copyOrLink(srcFilePath, dstFilePath): # lower the chances of corrupting it. tempDst = dstFilePath + '.tmp' copyfile(srcFilePath, tempDst) - atomicRename(tempDst, dstFilePath) + os.replace(tempDst, dstFilePath) def myExecutablePath():