Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 60 additions & 26 deletions processinghistory/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
value being a list of keys of the parents of that file. This dictionary stores
all the ancestry relationships for the whole lineage.

History in VRT files
--------------------
A GDAL VRT file is handled as a somewhat special case. The component files
of the VRT are treated as parents of the VRT (and there can be no other parents),
and the history of those files is read directly from them, rather than being
copied into the VRT. This is handled transparently, so that when history
is read from the VRT, it appears to have all come from there. This allows the
history of the components to be as dynamic as the data itself.

"""
import sys
import os
Expand All @@ -54,6 +63,7 @@
PARENTS_BY_KEY = "parentsByKey"
AUTOENVVARSLIST_NAME = "HISTORY_ENVVARS_TO_AUTOINCLUDE"
NO_TIMESTAMP = "UnknownTimestamp"
TIMESTAMP = "timestamp"

# These GDAL drivers are known to have limits on the size of metadata which
# can be stored, and so we need to keep below these, or we lose everything.
Expand All @@ -70,9 +80,36 @@ def __init__(self):
self.metadataByKey = {}
self.parentsByKey = {}

def addParentHistory(self, parentfile):
"""
Add history from parent file to self
"""
parentHist = readHistoryFromFile(filename=parentfile)

if parentHist is not None:
key = (os.path.basename(parentfile),
parentHist.metadataByKey[CURRENTFILE_KEY][TIMESTAMP])

# Convert parent's "currentfile" metadata and parentage to normal key entries
self.metadataByKey[key] = parentHist.metadataByKey[CURRENTFILE_KEY]
self.parentsByKey[key] = parentHist.parentsByKey[CURRENTFILE_KEY]

# Remove those from parentHist
parentHist.metadataByKey.pop(CURRENTFILE_KEY)
parentHist.parentsByKey.pop(CURRENTFILE_KEY)

# Copy over all the other ancestor metadata and parentage
self.metadataByKey.update(parentHist.metadataByKey)
self.parentsByKey.update(parentHist.parentsByKey)
else:
key = (os.path.basename(parentfile), NO_TIMESTAMP)

# Add this parent as parent of current file
self.parentsByKey[CURRENTFILE_KEY].append(key)

def toJSON(self):
"""
Return a JSON representation of the given ProcessingHistory
Return a JSON representation of the current ProcessingHistory
"""
d = {
METADATA_BY_KEY: {},
Expand Down Expand Up @@ -131,7 +168,7 @@ def makeAutomaticFields():
dictn = {}

# Time stamp formatted as per ISO 8601 standard, including time zone offset
dictn['timestamp'] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.localtime())
dictn[TIMESTAMP] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.localtime())

dictn['login'] = getpass.getuser()

Expand Down Expand Up @@ -239,8 +276,6 @@ def writeHistoryToFile(userDict={}, parents=[], *, filename=None, gdalDS=None):
File can be specified as either a filename string or an open GDAL Dataset

"""
procHist = makeProcessingHistory(userDict, parents)

if filename is not None:
ds = gdal.Open(filename, gdal.GA_Update)
else:
Expand All @@ -250,6 +285,12 @@ def writeHistoryToFile(userDict={}, parents=[], *, filename=None, gdalDS=None):
raise ProcessingHistoryError("Must supply either filename or gdalDS")

drvrName = ds.GetDriver().ShortName
isVRT = (drvrName == "VRT")
if isVRT and len(parents) > 0:
msg = "History for VRT files should not have parents"
raise ProcessingHistoryError(msg)

procHist = makeProcessingHistory(userDict, parents)

# Convert to JSON
procHistJSON = procHist.toJSON()
Expand Down Expand Up @@ -295,28 +336,7 @@ def makeProcessingHistory(userDict, parents):
# Now add history from each parent file
procHist.parentsByKey[CURRENTFILE_KEY] = []
for parentfile in parents:
parentHist = readHistoryFromFile(filename=parentfile)

if parentHist is not None:
key = (os.path.basename(parentfile),
parentHist.metadataByKey[CURRENTFILE_KEY]['timestamp'])

# Convert parent's "currentfile" metadata and parentage to normal key entries
procHist.metadataByKey[key] = parentHist.metadataByKey[CURRENTFILE_KEY]
procHist.parentsByKey[key] = parentHist.parentsByKey[CURRENTFILE_KEY]

# Remove those from parentHist
parentHist.metadataByKey.pop(CURRENTFILE_KEY)
parentHist.parentsByKey.pop(CURRENTFILE_KEY)

# Copy over all the other ancestor metadata and parentage
procHist.metadataByKey.update(parentHist.metadataByKey)
procHist.parentsByKey.update(parentHist.parentsByKey)
else:
key = (os.path.basename(parentfile), NO_TIMESTAMP)

# Add this parent as parent of current file
procHist.parentsByKey[CURRENTFILE_KEY].append(key)
procHist.addParentHistory(parentfile)

return procHist

Expand All @@ -342,6 +362,20 @@ def readHistoryFromFile(filename=None, gdalDS=None):

if procHistJSON is not None:
procHist = ProcessingHistory.fromJSON(procHistJSON)

# If this is a VRT, then read the component files as though they were
# parent files
isVRT = (ds.GetDriver().ShortName == "VRT")
if isVRT:
vrtFile = ds.GetDescription()
componentList = [fn for fn in ds.GetFileList() if fn != vrtFile]
for componentFile in componentList:
if not os.path.exists(componentFile):
msg = (f"VRT file '{vrtFile}' missing component " +
f"'{componentFile}'")
raise ProcessingHistoryError(msg)

procHist.addParentHistory(componentFile)
else:
procHist = None

Expand Down
Loading