From bd6bc1408ae7b84420b3b4a00180a41116f6255d Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Thu, 25 Jun 2020 16:23:18 +0100 Subject: [PATCH 01/65] First cut at a start of a fast_dp with DIALS script --- command_line/fast_derpee.py | 109 ++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 command_line/fast_derpee.py diff --git a/command_line/fast_derpee.py b/command_line/fast_derpee.py new file mode 100644 index 00000000..f3f2542b --- /dev/null +++ b/command_line/fast_derpee.py @@ -0,0 +1,109 @@ +import procrunner +import sys +import os +import glob +import copy + +from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory + + +class derpee: + def __init__(self, filenames): + self._experiment = ExperimentListFactory.from_filenames(filenames) + self._crystal = None + self._root = os.getcwd() + + # quick checks... + scan = self._experiment[0].scan + osc = scan.get_oscillation() + rng = scan.get_image_range() + + wedge = osc[1] * (rng[1] - rng[0] + 1) + + assert wedge >= 95 + + self._osc = osc[1] + self._rng = rng + + def index(self): + work = os.path.join(self._root, "index") + if os.path.exists(work): + indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) + + self._experiment[0].crystal = indexed[0].crystal + return + + os.mkdir(work) + + # index from 0-5, 45-50 degree blocks - hard code on 0.1 degree frames + # to start, do properly laters + self._experiment.as_file(os.path.join(work, "input.expt")) + + blocks = [ + (start + 1, start + int(round(5 / self._osc))) + for start in (0, int(round(45 / self._osc)), int(round(90 / self._osc))) + ] + + result = procrunner.run( + ["dials.find_spots", "input.expt", "nproc=8"] + + ["scan_range=%d,%d" % block for block in blocks], + working_directory=work, + ) + + # let's just assume that was fine - so index + + result = procrunner.run( + ["dials.index", "input.expt", "strong.refl"], working_directory=work + ) + + indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) + + self._experiment[0].crystal = indexed[0].crystal + + def integrate(self): + size = int(round(5 / self._osc)) + # count chunks in computer numbers + for j, start in enumerate(range(self._rng[0] - 1, self._rng[1], size)): + self.integrate_chunk(j, (start, start + size)) + + def integrate_chunk(self, no, chunk): + # need to figure out how to spin this off to somehing running on a + # cluster node... + work = os.path.join(self._root, "integrate%02d" % no) + if not os.path.exists(work): + os.mkdir(work) + + expt = copy.deepcopy(self._experiment) + + # fix up the scan to correspond to input chunk + scan = expt[0].scan + epochs = scan.get_epochs()[chunk[0] : chunk[1]] + exp_times = scan.get_exposure_times()[chunk[0] : chunk[1]] + scan.set_image_range((chunk[0] + 1, chunk[1])) + scan.set_epochs(epochs) + scan.set_exposure_times(exp_times) + expt[0].scan = scan + + expt.as_file(os.path.join(work, "input.expt")) + result = procrunner.run( + ["dials.find_spots", "input.expt", "nproc=8"], working_directory=work + ) + result = procrunner.run( + ["dials.index", "input.expt", "strong.refl"], working_directory=work + ) + result = procrunner.run( + ["dials.refine", "indexed.expt", "indexed.refl"], working_directory=work + ) + + result = procrunner.run( + ["dials.integrate", "refined.expt", "refined.refl", "nproc=8"], + working_directory=work, + ) + + +if __name__ == "__main__": + filenames = sum(map(glob.glob, sys.argv[1:]), []) + + derp = derpee(filenames) + derp.index() + derp.integrate() From 5256407b1d52b917356897d3569b1f82ebb68d54 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Thu, 25 Jun 2020 16:29:28 +0100 Subject: [PATCH 02/65] Forgot critical parameter --- command_line/fast_derpee.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/command_line/fast_derpee.py b/command_line/fast_derpee.py index f3f2542b..7e20f2bb 100644 --- a/command_line/fast_derpee.py +++ b/command_line/fast_derpee.py @@ -89,7 +89,13 @@ def integrate_chunk(self, no, chunk): ["dials.find_spots", "input.expt", "nproc=8"], working_directory=work ) result = procrunner.run( - ["dials.index", "input.expt", "strong.refl"], working_directory=work + [ + "dials.index", + "input.expt", + "strong.refl", + "index_assignment.method=local", + ], + working_directory=work, ) result = procrunner.run( ["dials.refine", "indexed.expt", "indexed.refl"], working_directory=work From 486cae16627469fe72b3aebb27537616fddd5893 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 26 Jun 2020 06:47:17 +0100 Subject: [PATCH 03/65] Reassign oscillation range Critical to get the spots to match up correctly in reciprocal space; once again shows that we have some broken data models here. Though could I have used scan slicing for this? --- command_line/fast_derpee.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/command_line/fast_derpee.py b/command_line/fast_derpee.py index 7e20f2bb..c1c42afc 100644 --- a/command_line/fast_derpee.py +++ b/command_line/fast_derpee.py @@ -22,7 +22,7 @@ def __init__(self, filenames): assert wedge >= 95 - self._osc = osc[1] + self._osc = osc self._rng = rng def index(self): @@ -40,8 +40,12 @@ def index(self): self._experiment.as_file(os.path.join(work, "input.expt")) blocks = [ - (start + 1, start + int(round(5 / self._osc))) - for start in (0, int(round(45 / self._osc)), int(round(90 / self._osc))) + (start + 1, start + int(round(5 / self._osc[1]))) + for start in ( + 0, + int(round(45 / self._osc[1])), + int(round(90 / self._osc[1])), + ) ] result = procrunner.run( @@ -61,7 +65,7 @@ def index(self): self._experiment[0].crystal = indexed[0].crystal def integrate(self): - size = int(round(5 / self._osc)) + size = int(round(5 / self._osc[1])) # count chunks in computer numbers for j, start in enumerate(range(self._rng[0] - 1, self._rng[1], size)): self.integrate_chunk(j, (start, start + size)) @@ -80,6 +84,9 @@ def integrate_chunk(self, no, chunk): epochs = scan.get_epochs()[chunk[0] : chunk[1]] exp_times = scan.get_exposure_times()[chunk[0] : chunk[1]] scan.set_image_range((chunk[0] + 1, chunk[1])) + scan.set_oscillation( + (self._osc[0] + (chunk[0] - self._rng[0]) * self._osc[1], self._osc[1]) + ) scan.set_epochs(epochs) scan.set_exposure_times(exp_times) expt[0].scan = scan From 327e0ebee2374880b2887f9cf7690d92a9bf6d42 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 26 Jun 2020 07:01:34 +0100 Subject: [PATCH 04/65] Commentary --- command_line/fast_derpee.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/command_line/fast_derpee.py b/command_line/fast_derpee.py index c1c42afc..079108c7 100644 --- a/command_line/fast_derpee.py +++ b/command_line/fast_derpee.py @@ -26,6 +26,9 @@ def __init__(self, filenames): self._rng = rng def index(self): + """Initial find spots and indexing: if it has been run already will + just reload the results from the previous run.""" + work = os.path.join(self._root, "index") if os.path.exists(work): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) @@ -65,14 +68,29 @@ def index(self): self._experiment[0].crystal = indexed[0].crystal def integrate(self): + """Integration of the complete scan: will split the data into 5 deg + chunks and spin the integration of each chunk off separately""" + size = int(round(5 / self._osc[1])) + + # need to figure out how to spin this off to somehing running on a + # cluster node... ideally want this called on many chunks at once + # count chunks in computer numbers for j, start in enumerate(range(self._rng[0] - 1, self._rng[1], size)): self.integrate_chunk(j, (start, start + size)) def integrate_chunk(self, no, chunk): - # need to figure out how to spin this off to somehing running on a - # cluster node... + """Integrate a chunk of data: performs - + - spot finding + - indexing by using the UB matrix determined above (quick) + - scan varying refinement + - integration + And works in the usual way for DIALS of using spots from every + image for modelling. This is designed to be data-local e.g. could + somehow stash the data as read for spot finding and not need to + read more times in the integration.""" + work = os.path.join(self._root, "integrate%02d" % no) if not os.path.exists(work): os.mkdir(work) @@ -113,6 +131,13 @@ def integrate_chunk(self, no, chunk): working_directory=work, ) + def symmetry_scale(self): + """Collect together the data so far integrated, use e.g. multiplex to + combine, determine the symmetry and scale, or combine experiments + followed by dials.symmetry and dials.scale #TODO. Since the UB matrix + is in principle the same for each scan should be fine.""" + pass + if __name__ == "__main__": filenames = sum(map(glob.glob, sys.argv[1:]), []) From 3ea2ebeca9f0b7b9559c38a5c2f3d52f2c9af989 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 26 Jun 2020 11:29:26 +0100 Subject: [PATCH 05/65] Incorporate symmetry, resolution estimation and scaling --- command_line/fast_derpee.py | 149 +++++++++++++++++++++++++++++++++--- 1 file changed, 140 insertions(+), 9 deletions(-) diff --git a/command_line/fast_derpee.py b/command_line/fast_derpee.py index 079108c7..aaf75665 100644 --- a/command_line/fast_derpee.py +++ b/command_line/fast_derpee.py @@ -25,6 +25,11 @@ def __init__(self, filenames): self._osc = osc self._rng = rng + self._integrated = [] + self._combined = None + self._symmetry = None + self._scaled = None + def index(self): """Initial find spots and indexing: if it has been run already will just reload the results from the previous run.""" @@ -36,7 +41,8 @@ def index(self): self._experiment[0].crystal = indexed[0].crystal return - os.mkdir(work) + if not os.path.exists(work): + os.mkdir(work) # index from 0-5, 45-50 degree blocks - hard code on 0.1 degree frames # to start, do properly laters @@ -78,7 +84,7 @@ def integrate(self): # count chunks in computer numbers for j, start in enumerate(range(self._rng[0] - 1, self._rng[1], size)): - self.integrate_chunk(j, (start, start + size)) + self._integrated.append(self.integrate_chunk(j, (start, start + size))) def integrate_chunk(self, no, chunk): """Integrate a chunk of data: performs - @@ -91,7 +97,14 @@ def integrate_chunk(self, no, chunk): somehow stash the data as read for spot finding and not need to read more times in the integration.""" - work = os.path.join(self._root, "integrate%02d" % no) + work = os.path.join(self._root, "integrate%03d" % no) + if os.path.exists(work): + if all( + os.path.exists(os.path.join(work, f"integrated.{exten}")) + for exten in ["refl", "expt"] + ): + return work + if not os.path.exists(work): os.mkdir(work) @@ -131,12 +144,125 @@ def integrate_chunk(self, no, chunk): working_directory=work, ) - def symmetry_scale(self): - """Collect together the data so far integrated, use e.g. multiplex to - combine, determine the symmetry and scale, or combine experiments - followed by dials.symmetry and dials.scale #TODO. Since the UB matrix - is in principle the same for each scan should be fine.""" - pass + return work + + def combine(self): + """Collect together the data so far integrated.""" + + work = os.path.join(self._root, "combine") + + if os.path.exists(work): + if all( + os.path.exists(os.path.join(work, f"combined.{exten}")) + for exten in ["refl", "expt"] + ): + self._combined = work + return + + if not os.path.exists(work): + os.mkdir(work) + + assert self._integrated is not [] + + integrated = sum( + [ + [ + os.path.join(directory, f"integrated.{exten}") + for exten in ["refl", "expt"] + ] + for directory in sorted(self._integrated) + ], + [], + ) + + result = procrunner.run( + ["dials.combine_experiments"] + integrated, working_directory=work + ) + + self._combined = work + + def symmetry(self): + """Take the combined data, determine the symmetry.""" + + assert self._combined is not None + + work = os.path.join(self._root, "symmetry") + + if os.path.exists(work): + if all( + os.path.exists(os.path.join(work, f"symmetrized.{exten}")) + for exten in ["refl", "expt"] + ): + self._symmetry = work + return work + + if not os.path.exists(work): + os.mkdir(work) + + result = procrunner.run( + ["dials.symmetry"] + + [ + os.path.join(self._combined, f"combined.{exten}") + for exten in ["refl", "expt"] + ], + working_directory=work, + ) + + self._symmetry = work + + def scale(self, d_min=None): + """Scale the data, to the resolution input or everything if unspecified. + If the data have been previously scaled, start from the scaled data + for sake of efficiency.""" + + work = os.path.join(self._root, "scale") + + if os.path.exists(work): + if d_min is None and all( + os.path.exists(os.path.join(work, f"scaled.{exten}")) + for exten in ["refl", "expt"] + ): + self._scaled = work + return work + + if not os.path.exists(work): + os.mkdir(work) + + if self._scaled is not None: + source = os.path.join(self._scaled, "scaled") + else: + source = os.path.join(self._symmetry, "symmetrized") + + command = ["dials.scale"] + if d_min is not None: + command.append(f"d_min={d_min}") + + result = procrunner.run( + command + [f"{source}.{exten}" for exten in ["refl", "expt"]], + working_directory=work, + ) + + self._scaled = work + + def resolution(self): + """Determine the resolution of the data after scaling.""" + + assert self._scaled + + source = os.path.join(self._scaled, "scaled") + + result = procrunner.run( + ["dials.resolutionizer"] + + [f"{source}.{exten}" for exten in ["refl", "expt"]], + working_directory=self._scaled, + ) + + d_min = None + for record in result["stdout"].split(b"\n"): + if record.startswith(b"Resolution cc_half:"): + d_min = float(record.split()[-1]) + + return d_min if __name__ == "__main__": @@ -145,3 +271,8 @@ def symmetry_scale(self): derp = derpee(filenames) derp.index() derp.integrate() + derp.combine() + derp.symmetry() + derp.scale() + d_min = derp.resolution() + derp.scale(d_min=d_min) From cfdd7212b1208b1675a61a72f048a9f5862437c1 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 29 Jun 2020 07:53:39 +0100 Subject: [PATCH 06/65] Rename to fp3 - faster parallel processing pipeline --- command_line/{fast_derpee.py => fp3.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename command_line/{fast_derpee.py => fp3.py} (100%) diff --git a/command_line/fast_derpee.py b/command_line/fp3.py similarity index 100% rename from command_line/fast_derpee.py rename to command_line/fp3.py From 700711d2d2061ff7af48bc153386a7ca4c9d96e8 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 29 Jun 2020 15:17:14 +0100 Subject: [PATCH 07/65] First cut at tool docs --- documentation/fp3/fp3.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 documentation/fp3/fp3.rst diff --git a/documentation/fp3/fp3.rst b/documentation/fp3/fp3.rst new file mode 100644 index 00000000..ee8e9908 --- /dev/null +++ b/documentation/fp3/fp3.rst @@ -0,0 +1,27 @@ +FP3: Faster Parallel Processing Pipeline +======================================== + +Introduction +------------ + +``dials.fp3`` is a very simple processing pipeline following the behaviour of ``fast_dp`` - except here using ``dials`` tools for the processing. The workflow is to: + +1. Import data +2. Find spots on three 5° wedges of data starting at 0°, 45°, 90° or + as close as can be reached to there +3. Index these spots, without making any effort to determine the + symmetry +4. Split the data into 5° blocks, and for each block: + i. slice down the input experiment to just the block, copy the + crystal model from step 3. into the imported experiment + ii. find spots on the block + iii. index the data against the known UB matrix + iv. refine (scan static + scan varying) + v. integrate +5. Combine the data from all integrated runs +6. Determine the symmetry +7. Scale (no resolution limit) +8. Determine high resolution limit +9. Scale output of step 7 with resolution from step 8 + + From 710b9ccfa4ee9819a860a971b2505c8dcfa8d889 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 29 Jun 2020 15:19:09 +0100 Subject: [PATCH 08/65] Rewrite --- documentation/fp3/fp3.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/documentation/fp3/fp3.rst b/documentation/fp3/fp3.rst index ee8e9908..e9b93be8 100644 --- a/documentation/fp3/fp3.rst +++ b/documentation/fp3/fp3.rst @@ -12,8 +12,7 @@ Introduction 3. Index these spots, without making any effort to determine the symmetry 4. Split the data into 5° blocks, and for each block: - i. slice down the input experiment to just the block, copy the - crystal model from step 3. into the imported experiment + i. slice down the input experiment to just the block, copy the crystal model from step 3. into the imported experiment ii. find spots on the block iii. index the data against the known UB matrix iv. refine (scan static + scan varying) From cff1a0c7d22952b11f25bebce8cb784820504aa5 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 09:08:12 +0100 Subject: [PATCH 09/65] Derpee -> fp3 --- command_line/fp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index aaf75665..98e11ce0 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -7,7 +7,7 @@ from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory -class derpee: +class fp3: def __init__(self, filenames): self._experiment = ExperimentListFactory.from_filenames(filenames) self._crystal = None @@ -268,7 +268,7 @@ def resolution(self): if __name__ == "__main__": filenames = sum(map(glob.glob, sys.argv[1:]), []) - derp = derpee(filenames) + derp = fp3(filenames) derp.index() derp.integrate() derp.combine() From 3d38dc10dd0ef280aae628ecbd00e3f2ad3cc6a1 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 09:23:34 +0100 Subject: [PATCH 10/65] Logic: if image[0] != 1 --- command_line/fp3.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 98e11ce0..e1cfa645 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -48,12 +48,15 @@ def index(self): # to start, do properly laters self._experiment.as_file(os.path.join(work, "input.expt")) + five = int(round(5 / self._osc[1])) + i0, i1 = self._rng + blocks = [ - (start + 1, start + int(round(5 / self._osc[1]))) + (start, start + five - 1) for start in ( - 0, - int(round(45 / self._osc[1])), - int(round(90 / self._osc[1])), + i0, + i0 + int(round(45 / self._osc[1])), + i0 + int(round(90 / self._osc[1])), ) ] From ce6c55074bf1b768f63ae8e5a8ca4ce9c411f040 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 10:07:56 +0100 Subject: [PATCH 11/65] Start factoring out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fix a bug where total range does not divide nicely into 5° chunks (e.g. 0.15° images) --- command_line/fp3.py | 16 +++++++++------- fp3/__init__.py | 3 +++ fp3/test_util.py | 9 +++++++++ fp3/util.py | 13 +++++++++++++ 4 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 fp3/__init__.py create mode 100644 fp3/test_util.py create mode 100644 fp3/util.py diff --git a/command_line/fp3.py b/command_line/fp3.py index e1cfa645..a98c87a2 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -5,7 +5,7 @@ import copy from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory - +from dials_scratch.fp3 import even_blocks class fp3: def __init__(self, filenames): @@ -80,14 +80,16 @@ def integrate(self): """Integration of the complete scan: will split the data into 5 deg chunks and spin the integration of each chunk off separately""" - size = int(round(5 / self._osc[1])) - + rng = self._rng + + nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) + blocks = even_blocks(rng[0] - 1, rng[1], nblocks) + # need to figure out how to spin this off to somehing running on a - # cluster node... ideally want this called on many chunks at once + # cluster node... ideally want this called on many blocks at once - # count chunks in computer numbers - for j, start in enumerate(range(self._rng[0] - 1, self._rng[1], size)): - self._integrated.append(self.integrate_chunk(j, (start, start + size))) + for j, block in enumerate(blocks): + self._integrated.append(self.integrate_chunk(j, block)) def integrate_chunk(self, no, chunk): """Integrate a chunk of data: performs - diff --git a/fp3/__init__.py b/fp3/__init__.py new file mode 100644 index 00000000..2459b8fd --- /dev/null +++ b/fp3/__init__.py @@ -0,0 +1,3 @@ +from .util import even_blocks + +__all__ = ["even_blocks"] diff --git a/fp3/test_util.py b/fp3/test_util.py new file mode 100644 index 00000000..d931297d --- /dev/null +++ b/fp3/test_util.py @@ -0,0 +1,9 @@ +from .util import even_blocks + + +def test_even_blocks(): + for b in even_blocks(0, 100, 10): + assert b[1] - b[0] == 10 + + for b in even_blocks(0, 2400, 72): + assert b[1] - b[0] in (33, 34) diff --git a/fp3/util.py b/fp3/util.py new file mode 100644 index 00000000..9d31bb65 --- /dev/null +++ b/fp3/util.py @@ -0,0 +1,13 @@ +def even_blocks(n0, n1, m): + """Split the range(n0, n1) into m evenly sized chunks, yeild start, end + for each chunk.""" + + r = m + n = n1 - n0 + + while r > 0: + s = int(round(n / r)) + yield n0, n0 + s + n -= s + n0 += s + r -= 1 From b10ed7a31fcad5b350790d40e6d93d1ead64bdea Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 10:43:08 +0100 Subject: [PATCH 12/65] Images for indexing --- fp3/__init__.py | 4 ++-- fp3/test_util.py | 7 ++++++- fp3/util.py | 23 +++++++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/fp3/__init__.py b/fp3/__init__.py index 2459b8fd..c1237e27 100644 --- a/fp3/__init__.py +++ b/fp3/__init__.py @@ -1,3 +1,3 @@ -from .util import even_blocks +from .util import even_blocks, index_blocks -__all__ = ["even_blocks"] +__all__ = ["even_blocks", "index_blocks"] diff --git a/fp3/test_util.py b/fp3/test_util.py index d931297d..b0f63d33 100644 --- a/fp3/test_util.py +++ b/fp3/test_util.py @@ -1,4 +1,4 @@ -from .util import even_blocks +from .util import even_blocks, index_blocks def test_even_blocks(): @@ -7,3 +7,8 @@ def test_even_blocks(): for b in even_blocks(0, 2400, 72): assert b[1] - b[0] in (33, 34) + +def test_index_blocks(): + assert index_blocks(0, 150, 0.1) == [(0, 150)] + assert index_blocks(0, 3600, 0.1) == [(0, 50), (450, 500), (900, 950)] + assert index_blocks(0, 900, 0.1) == [(0, 50), (425, 475), (850, 900)] diff --git a/fp3/util.py b/fp3/util.py index 9d31bb65..b6c17a1e 100644 --- a/fp3/util.py +++ b/fp3/util.py @@ -11,3 +11,26 @@ def even_blocks(n0, n1, m): n -= s n0 += s r -= 1 + +def index_blocks(n0, n1, osc): + """Give back a list of blocks in the range n0, n1 of around 5°, spaced at + the start, start + 45°, start + 90° if possible. If <= 15° of data in + total, use all, if < 95° get as close as possible to 45° wedge spacing.""" + + n = n1 - n0 + five = int(round(5 / osc)) + + if n * osc <= 15: + return [(n0, n1)] + + elif n * osc > 95: + n45 = int(round(45 / osc)) + n90 = int(round(90 / osc)) + return [(n0, n0 + five), (n45, n45 + five), (n90, n90 + five)] + + else: + half = n0 + (n - five) // 2 + return [(n0, n0 + five), (half, half + five), (n1 - five, n1)] + + + From c79390f10b2818398c6363bb1425c5b0b294f613 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 10:51:10 +0100 Subject: [PATCH 13/65] Use index_blocks Does involve some shuffling around between computer numbers and people numbers --- command_line/fp3.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index a98c87a2..3a247909 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -5,7 +5,7 @@ import copy from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory -from dials_scratch.fp3 import even_blocks +from dials_scratch.fp3 import even_blocks, index_blocks class fp3: def __init__(self, filenames): @@ -18,10 +18,6 @@ def __init__(self, filenames): osc = scan.get_oscillation() rng = scan.get_image_range() - wedge = osc[1] * (rng[1] - rng[0] + 1) - - assert wedge >= 95 - self._osc = osc self._rng = rng @@ -44,21 +40,11 @@ def index(self): if not os.path.exists(work): os.mkdir(work) - # index from 0-5, 45-50 degree blocks - hard code on 0.1 degree frames - # to start, do properly laters self._experiment.as_file(os.path.join(work, "input.expt")) five = int(round(5 / self._osc[1])) i0, i1 = self._rng - - blocks = [ - (start, start + five - 1) - for start in ( - i0, - i0 + int(round(45 / self._osc[1])), - i0 + int(round(90 / self._osc[1])), - ) - ] + blocks = [(b[0]+1, b[1]) for b in index_blocks(i0-1, i1, self._osc[1])] result = procrunner.run( ["dials.find_spots", "input.expt", "nproc=8"] From cafa70756827f49b4a1287711a7451be2817e05a Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 11:23:20 +0100 Subject: [PATCH 14/65] fp3 -> FP3 because class --- command_line/fp3.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 3a247909..e9a05bcb 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -7,7 +7,7 @@ from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory from dials_scratch.fp3 import even_blocks, index_blocks -class fp3: +class FP3: def __init__(self, filenames): self._experiment = ExperimentListFactory.from_filenames(filenames) self._crystal = None @@ -259,11 +259,11 @@ def resolution(self): if __name__ == "__main__": filenames = sum(map(glob.glob, sys.argv[1:]), []) - derp = fp3(filenames) - derp.index() - derp.integrate() - derp.combine() - derp.symmetry() - derp.scale() - d_min = derp.resolution() - derp.scale(d_min=d_min) + fp3 = FP3(filenames) + fp3.index() + fp3.integrate() + fp3.combine() + fp3.symmetry() + fp3.scale() + d_min = fp3.resolution() + fp3.scale(d_min=d_min) From ba96cde3883fa2a8d7075ce0e664b0b4520367a6 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 14:42:25 +0100 Subject: [PATCH 15/65] format_phil_include: workaround Tool to extract a subset of a phil scope which has been parsed back to text --- fp3/__init__.py | 4 ++-- fp3/util.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/fp3/__init__.py b/fp3/__init__.py index c1237e27..877719e7 100644 --- a/fp3/__init__.py +++ b/fp3/__init__.py @@ -1,3 +1,3 @@ -from .util import even_blocks, index_blocks +from .util import even_blocks, index_blocks, format_phil_include, nproc -__all__ = ["even_blocks", "index_blocks"] +__all__ = ["even_blocks", "index_blocks", "format_phil_include", "nproc"] diff --git a/fp3/util.py b/fp3/util.py index b6c17a1e..97c3e694 100644 --- a/fp3/util.py +++ b/fp3/util.py @@ -1,3 +1,8 @@ +import os + +from libtbx.introspection import number_of_processors + + def even_blocks(n0, n1, m): """Split the range(n0, n1) into m evenly sized chunks, yeild start, end for each chunk.""" @@ -12,6 +17,7 @@ def even_blocks(n0, n1, m): n0 += s r -= 1 + def index_blocks(n0, n1, osc): """Give back a list of blocks in the range n0, n1 of around 5°, spaced at the start, start + 45°, start + 90° if possible. If <= 15° of data in @@ -22,7 +28,7 @@ def index_blocks(n0, n1, osc): if n * osc <= 15: return [(n0, n1)] - + elif n * osc > 95: n45 = int(round(45 / osc)) n90 = int(round(90 / osc)) @@ -32,5 +38,23 @@ def index_blocks(n0, n1, osc): half = n0 + (n - five) // 2 return [(n0, n0 + five), (half, half + five), (n1 - five, n1)] - - + +def format_phil_include(scope, working, include): + """Provide a diff-phil for one element from given scope and working + values: warning horrible workaround.""" + + diff = scope.fetch_diff(working.get(include)).as_str() + if diff: + assert diff.startswith(include) + return "\n".join(line[2:] for line in diff.split("\n")[1:-1]) + else: + return diff + + +def nproc(): + try: + return int(os.environ.get("NSLOTS")) + except (ValueError, TypeError): + pass + + return number_of_processors(return_value_if_unknown=-1) From c21a0b01ec968a9aa4e14b19e1a03ec69b8d3b54 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 14:47:02 +0100 Subject: [PATCH 16/65] Allow passing PHIL overrides Includes scopes for every DIALS program which is used. For each step will extract relevant PHIL parameters to a file and pass on to the program in question if not empty. --- command_line/fp3.py | 101 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index e9a05bcb..8b8305fd 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -4,14 +4,52 @@ import glob import copy +from iotbx import phil from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory -from dials_scratch.fp3 import even_blocks, index_blocks +from dials_scratch.fp3 import even_blocks, index_blocks, format_phil_include +from dials_scratch.fp3 import nproc + +scope = phil.parse( + """ +find_spots { + include scope dials.command_line.find_spots.phil_scope +} +index { + include scope dials.command_line.index.phil_scope +} +refine { + include scope dials.command_line.refine.phil_scope +} +integrate { + include scope dials.command_line.integrate.phil_scope +} +symmetry { + include scope dials.command_line.symmetry.phil_scope +} +scale { + include scope dials.command_line.scale.phil_scope +} +resolution { + include scope dials.command_line.resolutionizer.phil_scope +} +""", + process_includes=True, +) + class FP3: - def __init__(self, filenames): + def __init__(self, filenames, params): self._experiment = ExperimentListFactory.from_filenames(filenames) + + # parse PHIL parameters + clai = scope.command_line_argument_interpreter() + self._working = scope.fetch(clai.process_and_fetch(params)) + + print(scope.fetch_diff(self._working).as_str()) + self._crystal = None self._root = os.getcwd() + self._n = nproc() # quick checks... scan = self._experiment[0].scan @@ -26,6 +64,20 @@ def __init__(self, filenames): self._symmetry = None self._scaled = None + def _write_phil(self, what, where): + """helper function: extract (what) from global scope and write as + (where)/(what).phil iff it contains anything. If file written, + returns [filename] else [] to allow straight command-line adding.""" + + phil = format_phil_include(scope, self._working, what).strip() + if not phil: + return [] + + out = os.path.join(where, f"{what}.phil") + with open(out, "w") as f: + f.write(phil) + return [out] + def index(self): """Initial find spots and indexing: if it has been run already will just reload the results from the previous run.""" @@ -44,18 +96,21 @@ def index(self): five = int(round(5 / self._osc[1])) i0, i1 = self._rng - blocks = [(b[0]+1, b[1]) for b in index_blocks(i0-1, i1, self._osc[1])] + blocks = [(b[0] + 1, b[1]) for b in index_blocks(i0 - 1, i1, self._osc[1])] + phil = self._write_phil("find_spots", work) result = procrunner.run( - ["dials.find_spots", "input.expt", "nproc=8"] + ["dials.find_spots", "input.expt", "nproc=%d" % self._n] + + phil + ["scan_range=%d,%d" % block for block in blocks], working_directory=work, ) # let's just assume that was fine - so index + phil = self._write_phil("index", work) result = procrunner.run( - ["dials.index", "input.expt", "strong.refl"], working_directory=work + ["dials.index", "input.expt", "strong.refl"] + phil, working_directory=work ) indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) @@ -67,10 +122,10 @@ def integrate(self): chunks and spin the integration of each chunk off separately""" rng = self._rng - + nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) blocks = even_blocks(rng[0] - 1, rng[1], nblocks) - + # need to figure out how to spin this off to somehing running on a # cluster node... ideally want this called on many blocks at once @@ -114,24 +169,35 @@ def integrate_chunk(self, no, chunk): expt[0].scan = scan expt.as_file(os.path.join(work, "input.expt")) + + phil = self._write_phil("find_spots", work) result = procrunner.run( - ["dials.find_spots", "input.expt", "nproc=8"], working_directory=work + ["dials.find_spots", "input.expt", "nproc=%d" % self._n] + phil, + working_directory=work, ) + + phil = self._write_phil("index", work) result = procrunner.run( [ "dials.index", "input.expt", "strong.refl", "index_assignment.method=local", - ], + ] + + phil, working_directory=work, ) + + phil = self._write_phil("refine", work) result = procrunner.run( - ["dials.refine", "indexed.expt", "indexed.refl"], working_directory=work + ["dials.refine", "indexed.expt", "indexed.refl"] + phil, + working_directory=work, ) + phil = self._write_phil("integrate", work) result = procrunner.run( - ["dials.integrate", "refined.expt", "refined.refl", "nproc=8"], + ["dials.integrate", "refined.expt", "refined.refl", "nproc=%d" % self._n] + + phil, working_directory=work, ) @@ -190,8 +256,10 @@ def symmetry(self): if not os.path.exists(work): os.mkdir(work) + phil = self._write_phil("symmetry", work) result = procrunner.run( ["dials.symmetry"] + + phil + [ os.path.join(self._combined, f"combined.{exten}") for exten in ["refl", "expt"] @@ -224,7 +292,8 @@ def scale(self, d_min=None): else: source = os.path.join(self._symmetry, "symmetrized") - command = ["dials.scale"] + phil = self._write_phil("scale", work) + command = ["dials.scale"] + phil if d_min is not None: command.append(f"d_min={d_min}") @@ -242,8 +311,10 @@ def resolution(self): source = os.path.join(self._scaled, "scaled") + phil = self._write_phil("resolution", work) result = procrunner.run( ["dials.resolutionizer"] + + phil + [f"{source}.{exten}" for exten in ["refl", "expt"]], working_directory=self._scaled, ) @@ -257,9 +328,11 @@ def resolution(self): if __name__ == "__main__": - filenames = sum(map(glob.glob, sys.argv[1:]), []) + args = [arg for arg in sys.argv[1:] if not "=" in arg] + params = [arg for arg in sys.argv[1:] if "=" in arg] + filenames = sum(map(glob.glob, args), []) - fp3 = FP3(filenames) + fp3 = FP3(filenames, params) fp3.index() fp3.integrate() fp3.combine() From 124b2a8752e6c71410c6754b5cb831c53e3ea378 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 14:52:07 +0100 Subject: [PATCH 17/65] Define work, duh --- command_line/fp3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index 8b8305fd..3eba39a1 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -311,7 +311,9 @@ def resolution(self): source = os.path.join(self._scaled, "scaled") + work = os.path.join(self._root, "scale") phil = self._write_phil("resolution", work) + result = procrunner.run( ["dials.resolutionizer"] + phil From 10bc77311e11b85aadbbe72db31654bb26a17ee3 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 17:04:45 +0100 Subject: [PATCH 18/65] New implementation of combine Merge reflection files and experiments manually, with local implementation --- fp3/__init__.py | 1 + fp3/tools.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 fp3/tools.py diff --git a/fp3/__init__.py b/fp3/__init__.py index 877719e7..2955780e 100644 --- a/fp3/__init__.py +++ b/fp3/__init__.py @@ -1,3 +1,4 @@ from .util import even_blocks, index_blocks, format_phil_include, nproc +from .tools import combine_reflections, combine_experiments __all__ = ["even_blocks", "index_blocks", "format_phil_include", "nproc"] diff --git a/fp3/tools.py b/fp3/tools.py new file mode 100644 index 00000000..5b796a76 --- /dev/null +++ b/fp3/tools.py @@ -0,0 +1,38 @@ +from dials.array_family import flex +from dxtbx.model.experiment_list import ExperimentList + + +def combine_reflections(fins, fout): + """Combine reflection files from fins into a single output file fout. + Makes assumptions that the data are consistent from one to the next, no + particular tests are performed at the moment.""" + + d0 = flex.reflection_table.from_file(fins[0]) + for f in fins[1:]: + d1 = flex.reflection_table.from_file(f) + + d1.experiment_identifiers()[0] = d0.experiment_identifiers()[0] + d0.extend(d1) + + d0.as_file(fout) + + +def combine_experiments(fins, fout): + """Combine experiment files from find to a single output file fout. + Makes assumptions that the crystal model is consistent from one experiment + to the next, that the detectors are all similar.""" + + e0 = ExperimentList.from_file(fins[0]) + c0 = e0[0].crystal + s0 = e0[0].scan + mats = [c0.get_A_at_scan_point(i) for i in range(c0.num_scan_points)] + for f in fins[1:]: + e1 = ExperimentList.from_file(f) + c1 = e1[0].crystal + s1 = e1[0].scan + s0 += s1 + mats.extend( + [c1.get_A_at_scan_point(i + 1) for i in range(c1.num_scan_points - 1)] + ) + c0.set_A_at_scan_points(mats) + e0.as_file(fout) From 6714c3c63c235746dce9cc7e3659348e2e65d665 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 30 Jun 2020 17:05:02 +0100 Subject: [PATCH 19/65] Use local implementation of combine --- command_line/fp3.py | 21 +++++++-------------- fp3/test_util.py | 1 + 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 3eba39a1..f5408747 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -7,7 +7,7 @@ from iotbx import phil from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory from dials_scratch.fp3 import even_blocks, index_blocks, format_phil_include -from dials_scratch.fp3 import nproc +from dials_scratch.fp3 import nproc, combine_reflections, combine_experiments scope = phil.parse( """ @@ -221,20 +221,13 @@ def combine(self): assert self._integrated is not [] - integrated = sum( - [ - [ - os.path.join(directory, f"integrated.{exten}") - for exten in ["refl", "expt"] - ] - for directory in sorted(self._integrated) - ], - [], - ) + integrated_refl = [os.path.join(directory, "integrated.refl") + for directory in sorted(self._integrated)] + integrated_expt = [os.path.join(directory, "integrated.expt") + for directory in sorted(self._integrated)] - result = procrunner.run( - ["dials.combine_experiments"] + integrated, working_directory=work - ) + combine_reflections(integrated_refl, os.path.join(work, "combined.refl")) + combine_experiments(integrated_expt, os.path.join(work, "combined.expt")) self._combined = work diff --git a/fp3/test_util.py b/fp3/test_util.py index b0f63d33..74f62d72 100644 --- a/fp3/test_util.py +++ b/fp3/test_util.py @@ -8,6 +8,7 @@ def test_even_blocks(): for b in even_blocks(0, 2400, 72): assert b[1] - b[0] in (33, 34) + def test_index_blocks(): assert index_blocks(0, 150, 0.1) == [(0, 150)] assert index_blocks(0, 3600, 0.1) == [(0, 50), (450, 500), (900, 950)] From e7320874b614076db15e324f6758c85471437913 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 6 Jul 2020 14:47:31 +0100 Subject: [PATCH 20/65] Fix off-by-one; use scan slicing --- command_line/fp3.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index f5408747..4a7daba8 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -154,19 +154,9 @@ def integrate_chunk(self, no, chunk): if not os.path.exists(work): os.mkdir(work) - expt = copy.deepcopy(self._experiment) - # fix up the scan to correspond to input chunk - scan = expt[0].scan - epochs = scan.get_epochs()[chunk[0] : chunk[1]] - exp_times = scan.get_exposure_times()[chunk[0] : chunk[1]] - scan.set_image_range((chunk[0] + 1, chunk[1])) - scan.set_oscillation( - (self._osc[0] + (chunk[0] - self._rng[0]) * self._osc[1], self._osc[1]) - ) - scan.set_epochs(epochs) - scan.set_exposure_times(exp_times) - expt[0].scan = scan + expt = copy.deepcopy(self._experiment) + expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] expt.as_file(os.path.join(work, "input.expt")) @@ -221,10 +211,14 @@ def combine(self): assert self._integrated is not [] - integrated_refl = [os.path.join(directory, "integrated.refl") - for directory in sorted(self._integrated)] - integrated_expt = [os.path.join(directory, "integrated.expt") - for directory in sorted(self._integrated)] + integrated_refl = [ + os.path.join(directory, "integrated.refl") + for directory in sorted(self._integrated) + ] + integrated_expt = [ + os.path.join(directory, "integrated.expt") + for directory in sorted(self._integrated) + ] combine_reflections(integrated_refl, os.path.join(work, "combined.refl")) combine_experiments(integrated_expt, os.path.join(work, "combined.expt")) From 51b5081e1171c6420bcdb8c0b44bdff21b399828 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 4 Sep 2020 15:50:50 +0100 Subject: [PATCH 21/65] += boost_adaptbx. --- __init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/__init__.py b/__init__.py index fccc5140..1b6f60d5 100644 --- a/__init__.py +++ b/__init__.py @@ -1,11 +1,11 @@ from __future__ import division try: - import boost.python + import boost_adaptbx.boost.python except Exception: ext = None else: - ext = boost.python.import_ext("dials_scratch_ext", optional=True) + ext = boost_adaptbx.boost.python.import_ext("dials_scratch_ext", optional=True) if not ext is None: from dials_scratch_ext import * From f6f299a805f764dccc5235bcebd90a245840469d Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 4 Sep 2020 15:51:07 +0100 Subject: [PATCH 22/65] dials.resolutionizer -> dials.estimate_resolution --- command_line/fp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 4a7daba8..4e4f16ec 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -30,7 +30,7 @@ include scope dials.command_line.scale.phil_scope } resolution { - include scope dials.command_line.resolutionizer.phil_scope + include scope dials.command_line.estimate_resolution.phil_scope } """, process_includes=True, @@ -302,7 +302,7 @@ def resolution(self): phil = self._write_phil("resolution", work) result = procrunner.run( - ["dials.resolutionizer"] + ["dials.estimate_resolution"] + phil + [f"{source}.{exten}" for exten in ["refl", "expt"]], working_directory=self._scaled, From 897d978a9ab9863bb3ce91e9dd8c5df9cc5594f1 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 4 Sep 2020 16:28:31 +0100 Subject: [PATCH 23/65] Parallelism Add future parallels wrapper to run each processing task on a given number of cores for a given number of workers - needs cluster addition but gives a framework for parallel operation --- command_line/fp3.py | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 4e4f16ec..f41ea60f 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -3,6 +3,7 @@ import os import glob import copy +import concurrent.futures from iotbx import phil from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory @@ -32,6 +33,14 @@ resolution { include scope dials.command_line.estimate_resolution.phil_scope } +parallelism = *process drmaa + .type = choice +max_workers = 1 + .type = int + .help = "Maximum number of concurrent workers" +worker_nproc = 1 + .type = int + .help = "Number of processes for each worker" """, process_includes=True, ) @@ -44,6 +53,7 @@ def __init__(self, filenames, params): # parse PHIL parameters clai = scope.command_line_argument_interpreter() self._working = scope.fetch(clai.process_and_fetch(params)) + self._params = self._working.extract() print(scope.fetch_diff(self._working).as_str()) @@ -126,11 +136,14 @@ def integrate(self): nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) blocks = even_blocks(rng[0] - 1, rng[1], nblocks) - # need to figure out how to spin this off to somehing running on a - # cluster node... ideally want this called on many blocks at once - - for j, block in enumerate(blocks): - self._integrated.append(self.integrate_chunk(j, block)) + with concurrent.futures.ProcessPoolExecutor( + max_workers=self._params.max_workers + ) as pool: + jobs = [] + for j, block in enumerate(blocks): + jobs.append(pool.submit(self.integrate_chunk, j, block)) + for job in concurrent.futures.as_completed(jobs): + self._integrated.append(job.result()) def integrate_chunk(self, no, chunk): """Integrate a chunk of data: performs - @@ -143,6 +156,9 @@ def integrate_chunk(self, no, chunk): somehow stash the data as read for spot finding and not need to read more times in the integration.""" + # FIXME this should probably be logging + print(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") + work = os.path.join(self._root, "integrate%03d" % no) if os.path.exists(work): if all( @@ -161,9 +177,14 @@ def integrate_chunk(self, no, chunk): expt.as_file(os.path.join(work, "input.expt")) phil = self._write_phil("find_spots", work) + # FIXME nproc here depends on max_workers and parallelism mode + np = self._params.worker_nproc + result = procrunner.run( - ["dials.find_spots", "input.expt", "nproc=%d" % self._n] + phil, + ["dials.find_spots", "input.expt", f"nproc={np}"] + phil, working_directory=work, + print_stdout=False, + print_stderr=False, ) phil = self._write_phil("index", work) @@ -176,19 +197,25 @@ def integrate_chunk(self, no, chunk): ] + phil, working_directory=work, + print_stdout=False, + print_stderr=False, ) phil = self._write_phil("refine", work) result = procrunner.run( ["dials.refine", "indexed.expt", "indexed.refl"] + phil, working_directory=work, + print_stdout=False, + print_stderr=False, ) phil = self._write_phil("integrate", work) + # FIXME nproc here depends on max_workers and parallelism mode result = procrunner.run( - ["dials.integrate", "refined.expt", "refined.refl", "nproc=%d" % self._n] - + phil, + ["dials.integrate", "refined.expt", "refined.refl", f"nproc={np}"] + phil, working_directory=work, + print_stdout=False, + print_stderr=False, ) return work From 0775262aee1e4097ea4106c50cd5db5106d39ff7 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 7 Sep 2020 07:01:27 +0100 Subject: [PATCH 24/65] WIP: create script to perform the processing for drmaa --- command_line/fp3.py | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index f41ea60f..345fe9d0 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -156,6 +156,8 @@ def integrate_chunk(self, no, chunk): somehow stash the data as read for spot finding and not need to read more times in the integration.""" + self.integrate_chunk_script(no, chunk) + # FIXME this should probably be logging print(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") @@ -220,6 +222,85 @@ def integrate_chunk(self, no, chunk): return work + def integrate_chunk_script(self, no, chunk): + """Integrate a chunk of data: performs - + - spot finding + - indexing by using the UB matrix determined above (quick) + - scan varying refinement + - integration + And works in the usual way for DIALS of using spots from every + image for modelling. This is designed to be data-local e.g. could + somehow stash the data as read for spot finding and not need to + read more times in the integration. This writes one script for + submission to a cluster to do the processing.""" + + # FIXME this should probably be logging + print(f"Writing script {no} for images {chunk[0]} to {chunk[1]}") + + # first check if there is nothing to be done here + work = os.path.join(self._root, "integrate%03d" % no) + if os.path.exists(work): + if all( + os.path.exists(os.path.join(work, f"integrated.{exten}")) + for exten in ["refl", "expt"] + ): + return work + + if not os.path.exists(work): + os.mkdir(work) + + # fix up the scan to correspond to input chunk, save to working area + expt = copy.deepcopy(self._experiment) + expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] + + expt.as_file(os.path.join(work, "input.expt")) + + # this is used to (i) write script and (ii) submit to cluster + np = self._params.worker_nproc + + fout = open(os.path.join(work, "integrate.sh"), "w") + + # dump key environment: PATH should be enough? + + fout.write( + "\n".join(["#!/bin/bash", "export PATH=%s" % os.environ["PATH"], "", ""]) + ) + + phil = self._write_phil("find_spots", work) + fout.write(f"dials.find_spots input.expt nproc={np}" + " ".join(phil) + "\n") + + phil = self._write_phil("index", work) + fout.write( + " ".join( + [ + "dials.index", + "input.expt", + "strong.refl", + "index_assignment.method=local", + ] + ) + + " ".join(phil) + + "\n" + ) + + phil = self._write_phil("refine", work) + fout.write( + " ".join(["dials.refine", "indexed.expt", "indexed.refl"]) + + " ".join(phil) + + "\n" + ) + + phil = self._write_phil("integrate", work) + fout.write( + " ".join(["dials.integrate", "refined.expt", "refined.refl", f"nproc={np}"]) + + " ".join(phil) + + "\n" + ) + + # FIXME submit the script for processing + + return work + def combine(self): """Collect together the data so far integrated.""" From 92ff391057fcc4e2652fa1adca3e5d8c7dc69589 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 7 Sep 2020 07:02:09 +0100 Subject: [PATCH 25/65] unlink --- command_line/fp3.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 345fe9d0..409464b3 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -156,8 +156,6 @@ def integrate_chunk(self, no, chunk): somehow stash the data as read for spot finding and not need to read more times in the integration.""" - self.integrate_chunk_script(no, chunk) - # FIXME this should probably be logging print(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") From 6a2bf252ac55a726991c27aa59a675211edb3781 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 7 Sep 2020 08:32:55 +0100 Subject: [PATCH 26/65] Run shell scripts - useful step along the way to executing on cluster --- command_line/fp3.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 409464b3..a2a494fb 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -141,7 +141,10 @@ def integrate(self): ) as pool: jobs = [] for j, block in enumerate(blocks): - jobs.append(pool.submit(self.integrate_chunk, j, block)) + if self._params.parallelism == "process": + jobs.append(pool.submit(self.integrate_chunk, j, block)) + else: + jobs.append(pool.submit(self.integrate_chunk_script, j, block)) for job in concurrent.futures.as_completed(jobs): self._integrated.append(job.result()) @@ -266,6 +269,7 @@ def integrate_chunk_script(self, no, chunk): phil = self._write_phil("find_spots", work) fout.write(f"dials.find_spots input.expt nproc={np}" + " ".join(phil) + "\n") + fout.write("if [ $? -ne 0 ] ; then exit $? ; fi\n") phil = self._write_phil("index", work) fout.write( @@ -280,6 +284,7 @@ def integrate_chunk_script(self, no, chunk): + " ".join(phil) + "\n" ) + fout.write("if [ $? -ne 0 ] ; then exit $? ; fi\n") phil = self._write_phil("refine", work) fout.write( @@ -287,6 +292,7 @@ def integrate_chunk_script(self, no, chunk): + " ".join(phil) + "\n" ) + fout.write("if [ $? -ne 0 ] ; then exit $? ; fi\n") phil = self._write_phil("integrate", work) fout.write( @@ -294,8 +300,18 @@ def integrate_chunk_script(self, no, chunk): + " ".join(phil) + "\n" ) + fout.write("if [ $? -ne 0 ] ; then exit $? ; fi\n") + + fout.close() # FIXME submit the script for processing + print(f"Executing script {no} for images {chunk[0]} to {chunk[1]}") + result = procrunner.run( + ["bash", "integrate.sh"], + working_directory=work, + print_stdout=False, + print_stderr=False, + ) return work From e4c835f2e8c53c89bd841b5bb3a11194547d2e8c Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 7 Sep 2020 08:51:53 +0100 Subject: [PATCH 27/65] Look for the environment script --- command_line/fp3.py | 7 ++----- fp3/__init__.py | 4 ++-- fp3/util.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index a2a494fb..1cb34303 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -9,6 +9,7 @@ from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory from dials_scratch.fp3 import even_blocks, index_blocks, format_phil_include from dials_scratch.fp3 import nproc, combine_reflections, combine_experiments +from dials_scratch.fp3 import find_setup_script scope = phil.parse( """ @@ -261,11 +262,7 @@ def integrate_chunk_script(self, no, chunk): fout = open(os.path.join(work, "integrate.sh"), "w") - # dump key environment: PATH should be enough? - - fout.write( - "\n".join(["#!/bin/bash", "export PATH=%s" % os.environ["PATH"], "", ""]) - ) + fout.write("\n".join(["#!/bin/bash", f". {find_setup_script()}", "", ""])) phil = self._write_phil("find_spots", work) fout.write(f"dials.find_spots input.expt nproc={np}" + " ".join(phil) + "\n") diff --git a/fp3/__init__.py b/fp3/__init__.py index 2955780e..006b3157 100644 --- a/fp3/__init__.py +++ b/fp3/__init__.py @@ -1,4 +1,4 @@ -from .util import even_blocks, index_blocks, format_phil_include, nproc +from .util import even_blocks, index_blocks, format_phil_include, nproc, find_setup_script from .tools import combine_reflections, combine_experiments -__all__ = ["even_blocks", "index_blocks", "format_phil_include", "nproc"] +__all__ = ["even_blocks", "index_blocks", "format_phil_include", "nproc", "find_setup_script"] diff --git a/fp3/util.py b/fp3/util.py index 97c3e694..2a92da4f 100644 --- a/fp3/util.py +++ b/fp3/util.py @@ -1,8 +1,26 @@ import os +import libtbx.load_env from libtbx.introspection import number_of_processors +def find_setup_script(): + """Look at the libtbx environment and look for the environment setup script + in a finite number of enumerated places.""" + + # old style setpaths.sh + script = libtbx.env.under_build("setpaths.sh") + if os.path.exists(script): + return script + + # new dials way + script = libtbx.env.under_root("dials") + if os.path.exists(script): + return script + + raise RuntimeError("cannot locate setup script") + + def even_blocks(n0, n1, m): """Split the range(n0, n1) into m evenly sized chunks, yeild start, end for each chunk.""" From 5e648f28f9ad914ec5d8890ecf1cacad78f379a9 Mon Sep 17 00:00:00 2001 From: Irakli Sikharulidze Date: Thu, 24 Sep 2020 23:23:21 +0100 Subject: [PATCH 28/65] Implement integrate task execution via drmaa array --- command_line/fp3.py | 71 ++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 1cb34303..5cedcef5 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -137,17 +137,17 @@ def integrate(self): nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) blocks = even_blocks(rng[0] - 1, rng[1], nblocks) - with concurrent.futures.ProcessPoolExecutor( - max_workers=self._params.max_workers - ) as pool: - jobs = [] - for j, block in enumerate(blocks): - if self._params.parallelism == "process": + if self._params.parallelism == "process": + with concurrent.futures.ProcessPoolExecutor( + max_workers=self._params.max_workers + ) as pool: + jobs = [] + for j, block in enumerate(blocks): jobs.append(pool.submit(self.integrate_chunk, j, block)) - else: - jobs.append(pool.submit(self.integrate_chunk_script, j, block)) - for job in concurrent.futures.as_completed(jobs): - self._integrated.append(job.result()) + for job in concurrent.futures.as_completed(jobs): + self._integrated.append(job.result()) + else: + self.integrate_drmaa_array(blocks) def integrate_chunk(self, no, chunk): """Integrate a chunk of data: performs - @@ -240,7 +240,7 @@ def integrate_chunk_script(self, no, chunk): print(f"Writing script {no} for images {chunk[0]} to {chunk[1]}") # first check if there is nothing to be done here - work = os.path.join(self._root, "integrate%03d" % no) + work = os.path.join(self._root, f"integrate{no:03}") if os.path.exists(work): if all( os.path.exists(os.path.join(work, f"integrated.{exten}")) @@ -301,17 +301,48 @@ def integrate_chunk_script(self, no, chunk): fout.close() - # FIXME submit the script for processing - print(f"Executing script {no} for images {chunk[0]} to {chunk[1]}") - result = procrunner.run( - ["bash", "integrate.sh"], - working_directory=work, - print_stdout=False, - print_stderr=False, - ) - return work + def integrate_drmaa_array(self, blocks): + script_file = os.path.join(self._root, 'run_integrate_array.sh') + with open(script_file, 'w') as script: + + script.write('#!/bin/bash\n') + nblocks = 0 + for idx, chunk in enumerate(blocks, start=1): + + working_dir = self.integrate_chunk_script(idx, chunk) + script.write(f'WORKING_DIR_{idx}={working_dir}\n') + self._integrated.append(working_dir) + nblocks += 1 + + script.write('TASK_WORKING_DIR=WORKING_DIR_${SGE_TASK_ID}\n') + script.write('cd ${!TASK_WORKING_DIR}\n') + script.write('sh ./integrate.sh > ${!TASK_WORKING_DIR}/integrate.out 2> ${!TASK_WORKING_DIR}/integrate.err') + + import drmaa + with drmaa.Session() as session: + job = session.createJobTemplate() + job.jobName = 'fp3_integrate' + job.workingDirectory = self._root + job.remoteCommand = 'sh' + args = [script_file,] + job.args = args + job.jobCategory = 'medium' + if self._params.worker_nproc > 1: + smp = f"-pe smp {self._params.worker_nproc}" + else: + smp = "" + #if sge_project: + # proj = f"-P {sge_project}" + #else: + # proj = '' + job.nativeSpecification = f"-V {smp} -l h_rt=12:00:00 -l mfree=4G -tc {self._params.max_workers} -o /dev/null -e /dev/null" + + job_ids = session.runBulkJobs(job, 1, nblocks, 1) + session.synchronize(job_ids, drmaa.Session.TIMEOUT_WAIT_FOREVER, True) + session.deleteJobTemplate(job) + def combine(self): """Collect together the data so far integrated.""" From 74c57e2de91e3534f2b2f67553b7bc60c7f4bdc7 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 14:24:50 +0100 Subject: [PATCH 29/65] A regular copy is enough, and much quicker --- command_line/fp3.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 5cedcef5..a1491da4 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -175,7 +175,7 @@ def integrate_chunk(self, no, chunk): os.mkdir(work) # fix up the scan to correspond to input chunk - expt = copy.deepcopy(self._experiment) + expt = copy.copy(self._experiment) expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] expt.as_file(os.path.join(work, "input.expt")) @@ -304,41 +304,46 @@ def integrate_chunk_script(self, no, chunk): return work def integrate_drmaa_array(self, blocks): - script_file = os.path.join(self._root, 'run_integrate_array.sh') - with open(script_file, 'w') as script: - - script.write('#!/bin/bash\n') + script_file = os.path.join(self._root, "run_integrate_array.sh") + with open(script_file, "w") as script: + + script.write("#!/bin/bash\n") nblocks = 0 for idx, chunk in enumerate(blocks, start=1): - + working_dir = self.integrate_chunk_script(idx, chunk) - script.write(f'WORKING_DIR_{idx}={working_dir}\n') + script.write(f"WORKING_DIR_{idx}={working_dir}\n") self._integrated.append(working_dir) nblocks += 1 - - script.write('TASK_WORKING_DIR=WORKING_DIR_${SGE_TASK_ID}\n') - script.write('cd ${!TASK_WORKING_DIR}\n') - script.write('sh ./integrate.sh > ${!TASK_WORKING_DIR}/integrate.out 2> ${!TASK_WORKING_DIR}/integrate.err') - + + script.write("TASK_WORKING_DIR=WORKING_DIR_${SGE_TASK_ID}\n") + script.write("cd ${!TASK_WORKING_DIR}\n") + script.write( + "sh ./integrate.sh > ${!TASK_WORKING_DIR}/integrate.out 2> ${!TASK_WORKING_DIR}/integrate.err" + ) + import drmaa + with drmaa.Session() as session: job = session.createJobTemplate() - job.jobName = 'fp3_integrate' + job.jobName = "fp3_integrate" job.workingDirectory = self._root - job.remoteCommand = 'sh' - args = [script_file,] + job.remoteCommand = "sh" + args = [ + script_file, + ] job.args = args - job.jobCategory = 'medium' + job.jobCategory = "medium" if self._params.worker_nproc > 1: smp = f"-pe smp {self._params.worker_nproc}" else: smp = "" - #if sge_project: + # if sge_project: # proj = f"-P {sge_project}" - #else: + # else: # proj = '' job.nativeSpecification = f"-V {smp} -l h_rt=12:00:00 -l mfree=4G -tc {self._params.max_workers} -o /dev/null -e /dev/null" - + job_ids = session.runBulkJobs(job, 1, nblocks, 1) session.synchronize(job_ids, drmaa.Session.TIMEOUT_WAIT_FOREVER, True) session.deleteJobTemplate(job) From 39dca4a83180a3b28b1621a0fcbc4902e9b4f907 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 15:05:44 +0100 Subject: [PATCH 30/65] Added elementary logging --- command_line/fp3.py | 50 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index a1491da4..be913d9e 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -4,6 +4,9 @@ import glob import copy import concurrent.futures +import logging + +logger = logging.getLogger("fp3") from iotbx import phil from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory @@ -42,6 +45,8 @@ worker_nproc = 1 .type = int .help = "Number of processes for each worker" +log_level = CRITICAL ERROR WARNING *INFO DEBUG + .type = choice """, process_includes=True, ) @@ -55,8 +60,13 @@ def __init__(self, filenames, params): clai = scope.command_line_argument_interpreter() self._working = scope.fetch(clai.process_and_fetch(params)) self._params = self._working.extract() + self._debug = self._params.log_level == "DEBUG" - print(scope.fetch_diff(self._working).as_str()) + # configure logging + logging.basicConfig(filename="dials.fp3.log", format="%(message)s") + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(getattr(logging, self._params.log_level)) + logger.info(scope.fetch_diff(self._working).as_str()) self._crystal = None self._root = os.getcwd() @@ -98,6 +108,8 @@ def index(self): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) self._experiment[0].crystal = indexed[0].crystal + logger.info("Picked up pre-exising crystal:") + logger.info(indexed[0].crystal) return if not os.path.exists(work): @@ -109,23 +121,30 @@ def index(self): i0, i1 = self._rng blocks = [(b[0] + 1, b[1]) for b in index_blocks(i0 - 1, i1, self._osc[1])] + logger.info("Finding spots...") phil = self._write_phil("find_spots", work) result = procrunner.run( ["dials.find_spots", "input.expt", "nproc=%d" % self._n] + phil + ["scan_range=%d,%d" % block for block in blocks], working_directory=work, + print_stdout=self._debug, + print_stderr=self._debug, ) + logger.info("Indexing...") # let's just assume that was fine - so index phil = self._write_phil("index", work) result = procrunner.run( - ["dials.index", "input.expt", "strong.refl"] + phil, working_directory=work + ["dials.index", "input.expt", "strong.refl"] + phil, + working_directory=work, + print_stdout=self._debug, + print_stderr=self._debug, ) indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) - + logger.info(indexed[0].crystal) self._experiment[0].crystal = indexed[0].crystal def integrate(self): @@ -137,6 +156,8 @@ def integrate(self): nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) blocks = even_blocks(rng[0] - 1, rng[1], nblocks) + logger.info("Integrating...") + if self._params.parallelism == "process": with concurrent.futures.ProcessPoolExecutor( max_workers=self._params.max_workers @@ -160,8 +181,7 @@ def integrate_chunk(self, no, chunk): somehow stash the data as read for spot finding and not need to read more times in the integration.""" - # FIXME this should probably be logging - print(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") + logger.debug(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") work = os.path.join(self._root, "integrate%03d" % no) if os.path.exists(work): @@ -236,8 +256,7 @@ def integrate_chunk_script(self, no, chunk): read more times in the integration. This writes one script for submission to a cluster to do the processing.""" - # FIXME this should probably be logging - print(f"Writing script {no} for images {chunk[0]} to {chunk[1]}") + logger.debug(f"Writing script {no} for images {chunk[0]} to {chunk[1]}") # first check if there is nothing to be done here work = os.path.join(self._root, f"integrate{no:03}") @@ -329,19 +348,13 @@ def integrate_drmaa_array(self, blocks): job.jobName = "fp3_integrate" job.workingDirectory = self._root job.remoteCommand = "sh" - args = [ - script_file, - ] + args = [script_file] job.args = args job.jobCategory = "medium" if self._params.worker_nproc > 1: smp = f"-pe smp {self._params.worker_nproc}" else: smp = "" - # if sge_project: - # proj = f"-P {sge_project}" - # else: - # proj = '' job.nativeSpecification = f"-V {smp} -l h_rt=12:00:00 -l mfree=4G -tc {self._params.max_workers} -o /dev/null -e /dev/null" job_ids = session.runBulkJobs(job, 1, nblocks, 1) @@ -351,6 +364,7 @@ def integrate_drmaa_array(self, blocks): def combine(self): """Collect together the data so far integrated.""" + logger.info("Combining...") work = os.path.join(self._root, "combine") if os.path.exists(work): @@ -385,6 +399,7 @@ def symmetry(self): assert self._combined is not None + logger.info("Determining symmetry...") work = os.path.join(self._root, "symmetry") if os.path.exists(work): @@ -407,6 +422,8 @@ def symmetry(self): for exten in ["refl", "expt"] ], working_directory=work, + print_stdout=self._debug, + print_stderr=self._debug, ) self._symmetry = work @@ -416,6 +433,7 @@ def scale(self, d_min=None): If the data have been previously scaled, start from the scaled data for sake of efficiency.""" + logger.info("Scaling...") work = os.path.join(self._root, "scale") if os.path.exists(work): @@ -442,6 +460,8 @@ def scale(self, d_min=None): result = procrunner.run( command + [f"{source}.{exten}" for exten in ["refl", "expt"]], working_directory=work, + print_stdout=self._debug, + print_stderr=self._debug, ) self._scaled = work @@ -461,6 +481,8 @@ def resolution(self): + phil + [f"{source}.{exten}" for exten in ["refl", "expt"]], working_directory=self._scaled, + print_stdout=self._debug, + print_stderr=self._debug, ) d_min = None From 7507c0fddf4a806b90ae3edad10a86c94bf32c39 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 15:41:05 +0100 Subject: [PATCH 31/65] Show final crystal --- command_line/fp3.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index be913d9e..ef49973e 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -407,6 +407,8 @@ def symmetry(self): os.path.exists(os.path.join(work, f"symmetrized.{exten}")) for exten in ["refl", "expt"] ): + symm = ExperimentList.from_file(os.path.join(work, "symmetrized.expt")) + logger.info(symm[0].crystal) self._symmetry = work return work @@ -426,6 +428,8 @@ def symmetry(self): print_stderr=self._debug, ) + symm = ExperimentList.from_file(os.path.join(work, "symmetrized.expt")) + logger.info(symm[0].crystal) self._symmetry = work def scale(self, d_min=None): @@ -471,6 +475,7 @@ def resolution(self): assert self._scaled + logger.info("Estimating resolution...") source = os.path.join(self._scaled, "scaled") work = os.path.join(self._root, "scale") @@ -490,6 +495,10 @@ def resolution(self): if record.startswith(b"Resolution cc_half:"): d_min = float(record.split()[-1]) + if d_min is not None: + logger.info(f"Resolution determined as {d_min:.2f}") + else: + logger.info("Recommendation is use all data") return d_min @@ -505,4 +514,5 @@ def resolution(self): fp3.symmetry() fp3.scale() d_min = fp3.resolution() - fp3.scale(d_min=d_min) + if d_min: + fp3.scale(d_min=d_min) From 3240040580b6b3df56d136015b9355d440ba424b Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 15:49:59 +0100 Subject: [PATCH 32/65] New file not append mode --- command_line/fp3.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index ef49973e..618c0b0a 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -63,7 +63,9 @@ def __init__(self, filenames, params): self._debug = self._params.log_level == "DEBUG" # configure logging - logging.basicConfig(filename="dials.fp3.log", format="%(message)s") + logging.basicConfig( + filename="dials.fp3.log", filemode="w", format="%(message)s" + ) logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(getattr(logging, self._params.log_level)) logger.info(scope.fetch_diff(self._working).as_str()) From a55d74d1208073535368c8cc46092337e0d84788 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 15:50:13 +0100 Subject: [PATCH 33/65] Print merging stats --- command_line/fp3.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index 618c0b0a..98e737ea 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -470,6 +470,14 @@ def scale(self, d_min=None): print_stderr=self._debug, ) + _ = result["stdout"].split(b"--Summary of merging statistics--").strip() + self._stats = [] + for line in _.split(b"\n"): + if not line.strip(): + break + self._stats.append(line) + + logger.info("\n".join(self._stats)) self._scaled = work def resolution(self): From ec60ca1e73df9e0cfb02cae0dd89681c69b97fb8 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 28 Sep 2020 16:29:43 +0100 Subject: [PATCH 34/65] typos --- command_line/fp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 98e737ea..600f6699 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -470,12 +470,12 @@ def scale(self, d_min=None): print_stderr=self._debug, ) - _ = result["stdout"].split(b"--Summary of merging statistics--").strip() + _ = result["stdout"].split(b"--Summary of merging statistics--")[-1].strip() self._stats = [] for line in _.split(b"\n"): if not line.strip(): break - self._stats.append(line) + self._stats.append(str(line)) logger.info("\n".join(self._stats)) self._scaled = work From 4377b9cd6e893d22f9cc633689f33eac2eb58206 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 05:48:40 +0100 Subject: [PATCH 35/65] Processing times --- command_line/fp3.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index 600f6699..9e1bb802 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -5,6 +5,7 @@ import copy import concurrent.futures import logging +import time logger = logging.getLogger("fp3") @@ -518,11 +519,20 @@ def resolution(self): filenames = sum(map(glob.glob, args), []) fp3 = FP3(filenames, params) + t0 = time.time() fp3.index() + t1 = time.time() fp3.integrate() + t2 = time.time() fp3.combine() + t3 = time.time() fp3.symmetry() + t4 = time.time() fp3.scale() + t5 = time.time() d_min = fp3.resolution() if d_min: fp3.scale(d_min=d_min) + t6 = time.time() + logger.info("\n".join(fp3._stats)) + logger.info(f"Total processing time: {t6 - t0:0.2f}s") From bfbd3a23383551f727f498dd78e5ffa73b4abd12 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 05:48:53 +0100 Subject: [PATCH 36/65] Correctly get scaling output --- command_line/fp3.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 9e1bb802..a5add52a 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -449,6 +449,24 @@ def scale(self, d_min=None): for exten in ["refl", "expt"] ): self._scaled = work + stdout = ( + open(os.path.join(work, "dials.scale.log"), "rb") + .read() + .split(b"\n") + ) + + # find summary table + for j, line in enumerate(stdout): + if b"--Summary of merging statistics--" in line: + break + + # save table of values + self._stats = [] + for line in stdout[j + 2 :]: + if not line.strip(): + break + self._stats.append(line.decode()) + return work if not os.path.exists(work): @@ -471,14 +489,20 @@ def scale(self, d_min=None): print_stderr=self._debug, ) - _ = result["stdout"].split(b"--Summary of merging statistics--")[-1].strip() + stdout = result["stdout"].split(b"\n") + + # find summary table + for j, line in enumerate(stdout): + if b"----------Summary of merging statistics-----------" in line: + break + + # save table of values self._stats = [] - for line in _.split(b"\n"): + for line in stdout[j + 2 :]: if not line.strip(): break - self._stats.append(str(line)) + self._stats.append(line.decode()) - logger.info("\n".join(self._stats)) self._scaled = work def resolution(self): From 4b41a5dab9b234e023b46d6263ac0a8ebf2efd86 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 05:52:56 +0100 Subject: [PATCH 37/65] TODO --- command_line/fp3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index a5add52a..366e68bb 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -346,6 +346,9 @@ def integrate_drmaa_array(self, blocks): import drmaa + # TODO in here check if processing already performed before submission + # of task... + with drmaa.Session() as session: job = session.createJobTemplate() job.jobName = "fp3_integrate" From 048debd8469cceb3e4aed234961f28131b9a3d20 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 08:53:47 +0100 Subject: [PATCH 38/65] Initial documentation --- fp3/fp3.rst | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 fp3/fp3.rst diff --git a/fp3/fp3.rst b/fp3/fp3.rst new file mode 100644 index 00000000..442ca949 --- /dev/null +++ b/fp3/fp3.rst @@ -0,0 +1,91 @@ +=== +FP3 +=== + +----------------------------------- +Faster Parallel Processing Pipeline +----------------------------------- + +Introduction +============ + +The usual DIALS pipeline is to: + + - import data + - find spots on all images + - index + - (optionally) determine Bravais lattice + - refine + - integrate + - (optionally) pre-scale the data + - determine symmetry + - scale + +This uses all of the data for every stage in the process, to give a good global model of the experiment, and some parallelism is employed throughout the process. While it is effective it is not necessarily _quick_, and processing can take a large amount of time for more substantial data sets. The aim of the ``fp3`` pipeline is to streamline some of the analysis without significantly compromising the quality of the results, in the same spirit as the ``fast_dp`` pipeline, which uses ``XDS``. + +Workflow +======== + +The workflow of ``fp3`` is very similar to that of ``fast_dp`` in that spots are found in three blocks at the start of the data set, indexing performed in P1 on this block, then the matrix used to integrate the entire data set in small blocks. This was found during the development of ``xia2`` to give an accurate matrix with minimal computational expense. ``fp3`` however works differently to ``fast_dp`` in the detail as the DIALS programs have a different set of assumptions. The workflow is therefore: + + - import the data + - find spots on 0-5°, 45-50° and 90-95° or as close as can be reached + - use this for indexing, determine and refine a UB matrix + - for each 5° block of data: + * find spots + * re-index with the known UB matrix + * refine + * integrate + - combine all integrate data + - determine symmetry + - scale + - decide resolution limit + - (as necessary) re-scale to determined limit + +This is logically _inefficient_ since the spot finding on some of the data is performed twice, however it is effective as every 5° block of data may be treated independently however is guaranteed to come out with consistently indexed data (provided that the sample has not slipped excessively). As such, these steps may be performed in parallel across multiple nodes on a cluster, or across multiple cores in a many-core computer. Within each step multiple cores may also be used (e.g. spot finding and integration) allowing for a substantial amount of compute to be applied to the problem. For finely sliced data the spot finding and integration are the most computationally expensive steps, and here these are mitigated in the first instance by limiting the range of data used for determination of the orientation matrix, and in the second by application of substantial parallelism. + +Usage +===== + +The basic program usage is:: + + dials.fp3 \ + max_workers=288 \ + parallelism=drmaa \ + worker_nproc=4 \ + ../Insulin15/Insulin15_3_master.h5 + +Here we have allowed up to 288 5° blocks to be processed in parallel, each using up to 4 cores (so a total of up to 1152 cores) using the ``drmaa`` mechanism for cluster job submission. Alternatively within a single machine the following may be used:: + + dials.fp3 max_workers=8 worker_nproc=2 ../data_master.h5 + +which will use up to 16 cores for the processing. For every stage in the processing (spot finding, indexing, refinement, integration &c.) the usual PHIL processing parameters may be passed in as every PHIL scope in DIALS is included in the ``fp3`` scope - for example assigning the unit cell and symmetry in indexing is as easy as setting e.g.:: + + known_symmetry.unit_cell=57,57,150,90,90,90 \ + known_symmetry.space_group=P41212 + +for example, for thaumatin. Clearly including the full PHIL scope for every step provides a *lot* of options - some of which could of course be mutually inconsistent, however in the most general case no options should be necessary. + +In the event that e.g. you want to rerun the scaling step with different parameters all that is necessary is to remove the ``scale`` directory and rerun - the results recorded to this point are re-loaded so the processing will pick up at the point of scaling. + +Acknowledgements +================ + +Clearly ``fp3`` would be impossible without the DIALS toolkit. The work has been supported by Diamond Light Source as part of ongoing operations. + +License +======= + +Copyright (c) 2012-2020 Diamond Light Source. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + - Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From c7f9cebac9acf8f8500d5334363de32cdf99bcea Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 09:00:25 +0100 Subject: [PATCH 39/65] += authors --- fp3/fp3.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fp3/fp3.rst b/fp3/fp3.rst index 442ca949..5fdd3593 100644 --- a/fp3/fp3.rst +++ b/fp3/fp3.rst @@ -89,3 +89,9 @@ contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Authors +======= + + - Graeme Winter + - Irakli Sikharulidze From 282d992cabf204b939a0adcbf732708db331d15f Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 09:06:43 +0100 Subject: [PATCH 40/65] Fix indentation --- fp3/fp3.rst | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/fp3/fp3.rst b/fp3/fp3.rst index 5fdd3593..956d99b6 100644 --- a/fp3/fp3.rst +++ b/fp3/fp3.rst @@ -11,15 +11,15 @@ Introduction The usual DIALS pipeline is to: - - import data - - find spots on all images - - index - - (optionally) determine Bravais lattice - - refine - - integrate - - (optionally) pre-scale the data - - determine symmetry - - scale +- import data +- find spots on all images +- index +- (optionally) determine Bravais lattice +- refine +- integrate +- (optionally) pre-scale the data +- determine symmetry +- scale This uses all of the data for every stage in the process, to give a good global model of the experiment, and some parallelism is employed throughout the process. While it is effective it is not necessarily _quick_, and processing can take a large amount of time for more substantial data sets. The aim of the ``fp3`` pipeline is to streamline some of the analysis without significantly compromising the quality of the results, in the same spirit as the ``fast_dp`` pipeline, which uses ``XDS``. @@ -28,19 +28,19 @@ Workflow The workflow of ``fp3`` is very similar to that of ``fast_dp`` in that spots are found in three blocks at the start of the data set, indexing performed in P1 on this block, then the matrix used to integrate the entire data set in small blocks. This was found during the development of ``xia2`` to give an accurate matrix with minimal computational expense. ``fp3`` however works differently to ``fast_dp`` in the detail as the DIALS programs have a different set of assumptions. The workflow is therefore: - - import the data - - find spots on 0-5°, 45-50° and 90-95° or as close as can be reached - - use this for indexing, determine and refine a UB matrix - - for each 5° block of data: - * find spots - * re-index with the known UB matrix - * refine - * integrate - - combine all integrate data - - determine symmetry - - scale - - decide resolution limit - - (as necessary) re-scale to determined limit +- import the data +- find spots on 0-5°, 45-50° and 90-95° or as close as can be reached +- use this for indexing, determine and refine a UB matrix +- for each 5° block of data: + * find spots + * re-index with the known UB matrix + * refine + * integrate +- combine all integrate data +- determine symmetry +- scale +- decide resolution limit +- (as necessary) re-scale to determined limit This is logically _inefficient_ since the spot finding on some of the data is performed twice, however it is effective as every 5° block of data may be treated independently however is guaranteed to come out with consistently indexed data (provided that the sample has not slipped excessively). As such, these steps may be performed in parallel across multiple nodes on a cluster, or across multiple cores in a many-core computer. Within each step multiple cores may also be used (e.g. spot finding and integration) allowing for a substantial amount of compute to be applied to the problem. For finely sliced data the spot finding and integration are the most computationally expensive steps, and here these are mitigated in the first instance by limiting the range of data used for determination of the orientation matrix, and in the second by application of substantial parallelism. @@ -82,9 +82,9 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - Neither the name of the copyright holder nor the names of its +- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +- Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -93,5 +93,5 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND Authors ======= - - Graeme Winter - - Irakli Sikharulidze +- Graeme Winter +- Irakli Sikharulidze From 51326105d4f45f40d32eab49bdc25427a0a3f489 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 09:08:06 +0100 Subject: [PATCH 41/65] Fix indentation --- fp3/fp3.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fp3/fp3.rst b/fp3/fp3.rst index 956d99b6..fe1127c4 100644 --- a/fp3/fp3.rst +++ b/fp3/fp3.rst @@ -82,11 +82,15 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. - Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From 2ce59f6ecb96f8d4026836e69e2a4ed0ef623877 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 09:09:46 +0100 Subject: [PATCH 42/65] Fix indentation --- fp3/fp3.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fp3/fp3.rst b/fp3/fp3.rst index fe1127c4..736d2df9 100644 --- a/fp3/fp3.rst +++ b/fp3/fp3.rst @@ -32,10 +32,12 @@ The workflow of ``fp3`` is very similar to that of ``fast_dp`` in that spots are - find spots on 0-5°, 45-50° and 90-95° or as close as can be reached - use this for indexing, determine and refine a UB matrix - for each 5° block of data: + * find spots * re-index with the known UB matrix * refine * integrate + - combine all integrate data - determine symmetry - scale From c2bad9bd81b83e04fb90f5b66df4afca0994ce93 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 09:10:42 +0100 Subject: [PATCH 43/65] italic --- fp3/fp3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fp3/fp3.rst b/fp3/fp3.rst index 736d2df9..f8923179 100644 --- a/fp3/fp3.rst +++ b/fp3/fp3.rst @@ -44,7 +44,7 @@ The workflow of ``fp3`` is very similar to that of ``fast_dp`` in that spots are - decide resolution limit - (as necessary) re-scale to determined limit -This is logically _inefficient_ since the spot finding on some of the data is performed twice, however it is effective as every 5° block of data may be treated independently however is guaranteed to come out with consistently indexed data (provided that the sample has not slipped excessively). As such, these steps may be performed in parallel across multiple nodes on a cluster, or across multiple cores in a many-core computer. Within each step multiple cores may also be used (e.g. spot finding and integration) allowing for a substantial amount of compute to be applied to the problem. For finely sliced data the spot finding and integration are the most computationally expensive steps, and here these are mitigated in the first instance by limiting the range of data used for determination of the orientation matrix, and in the second by application of substantial parallelism. +This is logically *inefficient* since the spot finding on some of the data is performed twice, however it is effective as every 5° block of data may be treated independently however is guaranteed to come out with consistently indexed data (provided that the sample has not slipped excessively). As such, these steps may be performed in parallel across multiple nodes on a cluster, or across multiple cores in a many-core computer. Within each step multiple cores may also be used (e.g. spot finding and integration) allowing for a substantial amount of compute to be applied to the problem. For finely sliced data the spot finding and integration are the most computationally expensive steps, and here these are mitigated in the first instance by limiting the range of data used for determination of the orientation matrix, and in the second by application of substantial parallelism. Usage ===== From 930d6acb97fdc62062f4d5437a8a0400bc810d3c Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 29 Sep 2020 10:41:36 +0100 Subject: [PATCH 44/65] If debug; print logs --- command_line/fp3.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index 366e68bb..0a17c8d2 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -173,6 +173,19 @@ def integrate(self): else: self.integrate_drmaa_array(blocks) + # if debug, print the log files from each of the integration stages + if self._debug: + for j, block in enumerate(blocks): + work = os.path.join(self._root, "integrate%03d" % no) + for log in ( + "dials.find_spots.log", + "dials.index.log", + "dials.refine.log", + "dials.integrate.log", + ): + for record in open(os.path.join(work, log)): + logger.debug(record[:-1]) + def integrate_chunk(self, no, chunk): """Integrate a chunk of data: performs - - spot finding From c2a32c2bdf9a113e8a3b8ce984932a1e5c363771 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 09:46:21 +0100 Subject: [PATCH 45/65] Typo Co-authored-by: David Waterman --- fp3/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fp3/tools.py b/fp3/tools.py index 5b796a76..896fb207 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -18,7 +18,7 @@ def combine_reflections(fins, fout): def combine_experiments(fins, fout): - """Combine experiment files from find to a single output file fout. + """Combine experiment files from fins to a single output file fout. Makes assumptions that the crystal model is consistent from one experiment to the next, that the detectors are all similar.""" From 5d67b4c8c7e0ee46c128541bd72e50ba21648845 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 09:46:34 +0100 Subject: [PATCH 46/65] Typo Co-authored-by: David Waterman --- fp3/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fp3/util.py b/fp3/util.py index 2a92da4f..02faf411 100644 --- a/fp3/util.py +++ b/fp3/util.py @@ -22,7 +22,7 @@ def find_setup_script(): def even_blocks(n0, n1, m): - """Split the range(n0, n1) into m evenly sized chunks, yeild start, end + """Split the range(n0, n1) into m evenly sized chunks, yield start, end for each chunk.""" r = m From 1c6bb8a7bfbdded23a2e03234d8a92a65cff9eed Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 09:46:49 +0100 Subject: [PATCH 47/65] Typo Co-authored-by: David Waterman --- command_line/fp3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 0a17c8d2..efc6df73 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -111,7 +111,7 @@ def index(self): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) self._experiment[0].crystal = indexed[0].crystal - logger.info("Picked up pre-exising crystal:") + logger.info("Picked up pre-existing crystal:") logger.info(indexed[0].crystal) return From e4693988e65d359d696e6b6fa03437c749a36574 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 09:51:25 +0100 Subject: [PATCH 48/65] Check file present Co-authored-by: David Waterman --- command_line/fp3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index efc6df73..9772e0e7 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -107,7 +107,7 @@ def index(self): just reload the results from the previous run.""" work = os.path.join(self._root, "index") - if os.path.exists(work): + if os.path.exists(os.path.join(work, "indexed.expt")): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) self._experiment[0].crystal = indexed[0].crystal From 57ca4ff25ec6fb7ecabe4d9d30adaeb394a4b11c Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 09:55:01 +0100 Subject: [PATCH 49/65] Disambiguate rng --- command_line/fp3.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 9772e0e7..3fb1a5cd 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -78,10 +78,9 @@ def __init__(self, filenames, params): # quick checks... scan = self._experiment[0].scan osc = scan.get_oscillation() - rng = scan.get_image_range() self._osc = osc - self._rng = rng + self._image_range = scan.get_image_range() self._integrated = [] self._combined = None @@ -121,7 +120,7 @@ def index(self): self._experiment.as_file(os.path.join(work, "input.expt")) five = int(round(5 / self._osc[1])) - i0, i1 = self._rng + i0, i1 = self._image_range blocks = [(b[0] + 1, b[1]) for b in index_blocks(i0 - 1, i1, self._osc[1])] logger.info("Finding spots...") @@ -154,10 +153,10 @@ def integrate(self): """Integration of the complete scan: will split the data into 5 deg chunks and spin the integration of each chunk off separately""" - rng = self._rng + irange = self._image_range - nblocks = int(round(self._osc[1] * (rng[1] - rng[0] + 1) / 5.0)) - blocks = even_blocks(rng[0] - 1, rng[1], nblocks) + nblocks = int(round(self._osc[1] * (irange[1] - irange[0] + 1) / 5.0)) + blocks = even_blocks(irange[0] - 1, irange[1], nblocks) logger.info("Integrating...") From a151b3e9a18a68ecb45562c3a6b5e5f60283bc5b Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 10:36:48 +0100 Subject: [PATCH 50/65] Prime factor code for sharing out cores --- fp3/__init__.py | 20 ++++++++++++++++++-- fp3/test_util.py | 16 +++++++++++++++- fp3/util.py | 16 +++++++++++++++- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/fp3/__init__.py b/fp3/__init__.py index 006b3157..7ebcc510 100644 --- a/fp3/__init__.py +++ b/fp3/__init__.py @@ -1,4 +1,20 @@ -from .util import even_blocks, index_blocks, format_phil_include, nproc, find_setup_script +from .util import ( + even_blocks, + index_blocks, + format_phil_include, + nproc, + find_setup_script, + prime_factors, +) from .tools import combine_reflections, combine_experiments -__all__ = ["even_blocks", "index_blocks", "format_phil_include", "nproc", "find_setup_script"] +__all__ = [ + "even_blocks", + "index_blocks", + "format_phil_include", + "nproc", + "find_setup_script", + "prime_factors", + "combine_reflections", + "combine_experiments", +] diff --git a/fp3/test_util.py b/fp3/test_util.py index 74f62d72..71cc54f9 100644 --- a/fp3/test_util.py +++ b/fp3/test_util.py @@ -1,4 +1,5 @@ -from .util import even_blocks, index_blocks +import functools +from .util import even_blocks, index_blocks, prime_factors def test_even_blocks(): @@ -13,3 +14,16 @@ def test_index_blocks(): assert index_blocks(0, 150, 0.1) == [(0, 150)] assert index_blocks(0, 3600, 0.1) == [(0, 50), (450, 500), (900, 950)] assert index_blocks(0, 900, 0.1) == [(0, 50), (425, 475), (850, 900)] + + +def test_prime_factors(): + assert prime_factors(12) == [2, 2, 3] + assert prime_factors(2 ** 8) == [2] * 8 + assert prime_factors(2 * 3 * 4 * 5 * 6) == [2, 2, 2, 2, 3, 3, 5] + + # FIXME should probably move this to it's own function + f = prime_factors(12) + a = functools.reduce((lambda x, y: x * y), f[0::2]) + b = functools.reduce((lambda x, y: x * y), f[1::2]) + assert a == 6 + assert b == 2 diff --git a/fp3/util.py b/fp3/util.py index 02faf411..d95d17b2 100644 --- a/fp3/util.py +++ b/fp3/util.py @@ -37,7 +37,7 @@ def even_blocks(n0, n1, m): def index_blocks(n0, n1, osc): - """Give back a list of blocks in the range n0, n1 of around 5°, spaced at + """Give back a list of blocks in the range n0, n1 of around 5°, spaced at the start, start + 45°, start + 90° if possible. If <= 15° of data in total, use all, if < 95° get as close as possible to 45° wedge spacing.""" @@ -76,3 +76,17 @@ def nproc(): pass return number_of_processors(return_value_if_unknown=-1) + + +def prime_factors(n): + i = 2 + factors = [] + while i * i <= n: + if n % i: + i += 1 + else: + n //= i + factors.append(i) + if n > 1: + factors.append(n) + return factors From b219fc050e14e781f74c76f8b33cdb5f80ac643a Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Fri, 2 Oct 2020 10:38:29 +0100 Subject: [PATCH 51/65] If nproc and workers == 1 guess sensible division --- command_line/fp3.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 3fb1a5cd..cc8f5d12 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -6,6 +6,7 @@ import concurrent.futures import logging import time +import functools logger = logging.getLogger("fp3") @@ -13,7 +14,7 @@ from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory from dials_scratch.fp3 import even_blocks, index_blocks, format_phil_include from dials_scratch.fp3 import nproc, combine_reflections, combine_experiments -from dials_scratch.fp3 import find_setup_script +from dials_scratch.fp3 import find_setup_script, prime_factors scope = phil.parse( """ @@ -82,6 +83,24 @@ def __init__(self, filenames, params): self._osc = osc self._image_range = scan.get_image_range() + # default nproc here depends on max_workers and parallelism mode - + # if both are set to 1 have a guess for something reasonable - factor + # nproc to primes then share these out between workers and cores... + + np = self._params.worker_nproc + nw = self._params.max_workers + if np == 1 and nw == 1: + f = prime_factors(self._n) + nw = functools.reduce((lambda x, y: x * y), f[0::2]) + np = functools.reduce((lambda x, y: x * y), f[1::2]) + self._params.worker_nproc = np + self._params.max_workers = nw + + logger.info(f"Using {self._n} processors to for images {self._image_range}") + logger.info( + f"with up to {self._params.max_workers} x {self._params.worker_nproc} core workers" + ) + self._integrated = [] self._combined = None self._symmetry = None @@ -216,7 +235,7 @@ def integrate_chunk(self, no, chunk): expt.as_file(os.path.join(work, "input.expt")) phil = self._write_phil("find_spots", work) - # FIXME nproc here depends on max_workers and parallelism mode + np = self._params.worker_nproc result = procrunner.run( From 43ae020bf76ad92d2e572f97422ed1212930ec0e Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Sat, 3 Oct 2020 10:11:50 +0100 Subject: [PATCH 52/65] =?UTF-8?q?Cope=20with=20data=20sets=20>=205000?= =?UTF-8?q?=C2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- command_line/fp3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index cc8f5d12..51f8ba8c 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -194,7 +194,7 @@ def integrate(self): # if debug, print the log files from each of the integration stages if self._debug: for j, block in enumerate(blocks): - work = os.path.join(self._root, "integrate%03d" % no) + work = os.path.join(self._root, "integrate%04d" % no) for log in ( "dials.find_spots.log", "dials.index.log", @@ -217,7 +217,7 @@ def integrate_chunk(self, no, chunk): logger.debug(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") - work = os.path.join(self._root, "integrate%03d" % no) + work = os.path.join(self._root, "integrate%04d" % no) if os.path.exists(work): if all( os.path.exists(os.path.join(work, f"integrated.{exten}")) @@ -293,7 +293,7 @@ def integrate_chunk_script(self, no, chunk): logger.debug(f"Writing script {no} for images {chunk[0]} to {chunk[1]}") # first check if there is nothing to be done here - work = os.path.join(self._root, f"integrate{no:03}") + work = os.path.join(self._root, f"integrate{no:04}") if os.path.exists(work): if all( os.path.exists(os.path.join(work, f"integrated.{exten}")) From 299896cfca761be78ba0141a389fda12fe6a5617 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Sat, 3 Oct 2020 15:11:40 +0100 Subject: [PATCH 53/65] Add to repo --- gw/nexus6.py | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 gw/nexus6.py diff --git a/gw/nexus6.py b/gw/nexus6.py new file mode 100644 index 00000000..6956283c --- /dev/null +++ b/gw/nexus6.py @@ -0,0 +1,213 @@ +import sys +from collections import namedtuple + +import h5py +import numpy + +from scitbx import matrix + + +def axis_rt(axis, setting=None): + if axis.type == b"rotation": + if setting: + if hasattr(setting.positions, "__iter__"): + a = setting.positions[0] + else: + a = setting.positions + else: + a = 0.0 + v = matrix.col(axis.vector) + r = v.axis_and_angle_as_r3_rotation_matrix(a, deg=True) + t = matrix.col(axis.offset) + elif axis.type == b"translation": + if setting: + if hasattr(setting.positions, "__iter__"): + t = setting.positions[0] + else: + t = setting.positions + else: + t = 0.0 + v = matrix.col(axis.vector) + r = matrix.sqr((1, 0, 0, 0, 1, 0, 0, 0, 1)) + o = matrix.col(axis.offset) + t = v * t + o + + rt = matrix.rt((r, t)) + return rt + + +def validate_nxmx(f): + """Checks that this is an NXmx file, raises appropriate exception if not.""" + + if not b"/entry/definition" in f: + raise ValueError("/entry/definition missing") + + definition = f[b"/entry/definition"][()] + if definition != numpy.string_("NXmx"): + raise ValueError("definition not NXmx: %s" % str(definition)) + + +def mm_distance_or_deg_angle(d): + """Return the distance here in mm""" + + if not b"transformation_type" in d.attrs: + raise ValueError("dataset has no transformation_type") + + if not b"units" in d.attrs: + raise ValueError("units missing from dataset") + + if d.attrs[b"transformation_type"] == numpy.string_("translation"): + mm_scale = {numpy.string_("m"): 1000.0, numpy.string_("mm"): 1.0}.get( + d.attrs[b"units"] + ) + + return mm_scale * d[()] + elif d.attrs[b"transformation_type"] == numpy.string_("rotation"): + if d.attrs[b"units"] != numpy.string_("deg"): + raise ValueError("only degrees supported as rotation unit") + return d[()] + else: + raise ValueError("transformation_type not translation or rotation") + + +def detector_fast_slow_origin(f): + """Construct a depends_on tree for the detector, assumed to be at + /entry/instrument/detector with either attribute or dataset depends_on""" + + detector = f[b"/entry/instrument/detector"] + + # Commentary: + # + # In theory the detector dependency hierarchy can be derived from the + # properties of what the detector depends on however this is not true + # in real life, as the detector depends on only one axis (currently) and + # has no offsets so is incorrectly defined. The fast and slow pixel + # directions _are_ however robustly defined -> use these to define the + # hierarchy then... + + depends_on = None + + if b"depends_on" in detector: + depends_on = detector[b"depends_on"][()] + elif b"depends_on" in detector.attrs: + depends_on = detector.atrs[b"depends_on"] + else: + raise ValueError("no depends_on found in /entry/instrument/detector") + + # fast and slow directions, work on the basis that these have the dependency + # attributes and take no prisoners if this is false + + fast = detector[b"module/fast_pixel_direction"] + slow = detector[b"module/slow_pixel_direction"] + + # assert for the moment that these _do not_ have independent offsets + nil = numpy.array((0.0, 0.0, 0.0)) + + foff = fast.attrs.get(b"offset", nil) + if not numpy.array_equal(foff, nil): + raise ValueError("fast axis has offset") + + soff = slow.attrs.get(b"offset", nil) + if not numpy.array_equal(soff, nil): + raise ValueError("slow axis has offset") + + axis = namedtuple( + "axis", ("id", "type", "equipment", "depends_on", "vector", "offset") + ) + + setting = namedtuple("setting", ("id", "positions")) + + axes = [] + settings = [] + + # I don't know if this is really a constraint or not, quite possibly + # one could depend on the other... + if fast.attrs[b"depends_on"] != slow.attrs[b"depends_on"]: + raise ValueError("fast and slow axes depend on different axes") + + depends_on = fast.attrs[b"depends_on"] + + while depends_on != numpy.string_("."): + element = f[depends_on] + + # believe all offsets in m - want mm so + offset = 1000 * element.attrs.get(b"offset", nil) + vector = element.attrs[b"vector"] + values = mm_distance_or_deg_angle(element) + + axes.append( + axis( + depends_on, + element.attrs[b"transformation_type"], + b"detector", + element.attrs[b"depends_on"], + vector, + offset, + ) + ) + settings.append(setting(depends_on, values)) + + depends_on = element.attrs[b"depends_on"] + + # construct a tree from these + tree = {} + ht = {} + for a in axes: + ht[a.id] = a + tree[a.id] = a.depends_on + + # convenient lookup for axis settings - means we have an indexed table + sets = {} + for s in settings: + sets[s.id] = s + + # build up a handy dandy hash table of rt matrices + rts = {} + for a in axes: + if not a.equipment in (b"goniometer", b"detector"): + continue + + rts[a.id] = axis_rt(a, sets.get(a.id, None)) + + origin = matrix.col(nil) + fast_vector = list(fast.attrs[b"vector"]) + slow_vector = list(slow.attrs[b"vector"]) + depends = fast.attrs[b"depends_on"] + + while depends != b".": + origin = rts[depends] * origin + fast_vector = rts[depends].r * fast_vector + slow_vector = rts[depends].r * slow_vector + depends = ht[depends].depends_on + + return matrix.col(fast_vector), matrix.col(slow_vector), matrix.col(origin) + + +def mcstas_to_imgcif(v): + """Convert vector v to imgCIF frame from mcstas""" + + rot = matrix.sqr((-1, 0, 0, 0, 1, 0, 0, 0, -1)) + + return rot * v + + +def draw(filename): + + with h5py.File(filename, "r") as f: + # belt + braces here + validate_nxmx(f) + + # construct the tree for the detector + f, s, o = detector_fast_slow_origin(f) + + f = mcstas_to_imgcif(f) + s = mcstas_to_imgcif(s) + o = mcstas_to_imgcif(o) + + print(f.elems) + print(s.elems) + print(o.elems) + + +if __name__ == "__main__": + draw(sys.argv[1]) From 8368437bb6f221cdf9a1ca184dabd6c2f0726f68 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 5 Oct 2020 06:33:41 +0100 Subject: [PATCH 54/65] Revert "Add to repo" This reverts commit 299896cfca761be78ba0141a389fda12fe6a5617. Meant to put on master :-/ --- gw/nexus6.py | 213 --------------------------------------------------- 1 file changed, 213 deletions(-) delete mode 100644 gw/nexus6.py diff --git a/gw/nexus6.py b/gw/nexus6.py deleted file mode 100644 index 6956283c..00000000 --- a/gw/nexus6.py +++ /dev/null @@ -1,213 +0,0 @@ -import sys -from collections import namedtuple - -import h5py -import numpy - -from scitbx import matrix - - -def axis_rt(axis, setting=None): - if axis.type == b"rotation": - if setting: - if hasattr(setting.positions, "__iter__"): - a = setting.positions[0] - else: - a = setting.positions - else: - a = 0.0 - v = matrix.col(axis.vector) - r = v.axis_and_angle_as_r3_rotation_matrix(a, deg=True) - t = matrix.col(axis.offset) - elif axis.type == b"translation": - if setting: - if hasattr(setting.positions, "__iter__"): - t = setting.positions[0] - else: - t = setting.positions - else: - t = 0.0 - v = matrix.col(axis.vector) - r = matrix.sqr((1, 0, 0, 0, 1, 0, 0, 0, 1)) - o = matrix.col(axis.offset) - t = v * t + o - - rt = matrix.rt((r, t)) - return rt - - -def validate_nxmx(f): - """Checks that this is an NXmx file, raises appropriate exception if not.""" - - if not b"/entry/definition" in f: - raise ValueError("/entry/definition missing") - - definition = f[b"/entry/definition"][()] - if definition != numpy.string_("NXmx"): - raise ValueError("definition not NXmx: %s" % str(definition)) - - -def mm_distance_or_deg_angle(d): - """Return the distance here in mm""" - - if not b"transformation_type" in d.attrs: - raise ValueError("dataset has no transformation_type") - - if not b"units" in d.attrs: - raise ValueError("units missing from dataset") - - if d.attrs[b"transformation_type"] == numpy.string_("translation"): - mm_scale = {numpy.string_("m"): 1000.0, numpy.string_("mm"): 1.0}.get( - d.attrs[b"units"] - ) - - return mm_scale * d[()] - elif d.attrs[b"transformation_type"] == numpy.string_("rotation"): - if d.attrs[b"units"] != numpy.string_("deg"): - raise ValueError("only degrees supported as rotation unit") - return d[()] - else: - raise ValueError("transformation_type not translation or rotation") - - -def detector_fast_slow_origin(f): - """Construct a depends_on tree for the detector, assumed to be at - /entry/instrument/detector with either attribute or dataset depends_on""" - - detector = f[b"/entry/instrument/detector"] - - # Commentary: - # - # In theory the detector dependency hierarchy can be derived from the - # properties of what the detector depends on however this is not true - # in real life, as the detector depends on only one axis (currently) and - # has no offsets so is incorrectly defined. The fast and slow pixel - # directions _are_ however robustly defined -> use these to define the - # hierarchy then... - - depends_on = None - - if b"depends_on" in detector: - depends_on = detector[b"depends_on"][()] - elif b"depends_on" in detector.attrs: - depends_on = detector.atrs[b"depends_on"] - else: - raise ValueError("no depends_on found in /entry/instrument/detector") - - # fast and slow directions, work on the basis that these have the dependency - # attributes and take no prisoners if this is false - - fast = detector[b"module/fast_pixel_direction"] - slow = detector[b"module/slow_pixel_direction"] - - # assert for the moment that these _do not_ have independent offsets - nil = numpy.array((0.0, 0.0, 0.0)) - - foff = fast.attrs.get(b"offset", nil) - if not numpy.array_equal(foff, nil): - raise ValueError("fast axis has offset") - - soff = slow.attrs.get(b"offset", nil) - if not numpy.array_equal(soff, nil): - raise ValueError("slow axis has offset") - - axis = namedtuple( - "axis", ("id", "type", "equipment", "depends_on", "vector", "offset") - ) - - setting = namedtuple("setting", ("id", "positions")) - - axes = [] - settings = [] - - # I don't know if this is really a constraint or not, quite possibly - # one could depend on the other... - if fast.attrs[b"depends_on"] != slow.attrs[b"depends_on"]: - raise ValueError("fast and slow axes depend on different axes") - - depends_on = fast.attrs[b"depends_on"] - - while depends_on != numpy.string_("."): - element = f[depends_on] - - # believe all offsets in m - want mm so - offset = 1000 * element.attrs.get(b"offset", nil) - vector = element.attrs[b"vector"] - values = mm_distance_or_deg_angle(element) - - axes.append( - axis( - depends_on, - element.attrs[b"transformation_type"], - b"detector", - element.attrs[b"depends_on"], - vector, - offset, - ) - ) - settings.append(setting(depends_on, values)) - - depends_on = element.attrs[b"depends_on"] - - # construct a tree from these - tree = {} - ht = {} - for a in axes: - ht[a.id] = a - tree[a.id] = a.depends_on - - # convenient lookup for axis settings - means we have an indexed table - sets = {} - for s in settings: - sets[s.id] = s - - # build up a handy dandy hash table of rt matrices - rts = {} - for a in axes: - if not a.equipment in (b"goniometer", b"detector"): - continue - - rts[a.id] = axis_rt(a, sets.get(a.id, None)) - - origin = matrix.col(nil) - fast_vector = list(fast.attrs[b"vector"]) - slow_vector = list(slow.attrs[b"vector"]) - depends = fast.attrs[b"depends_on"] - - while depends != b".": - origin = rts[depends] * origin - fast_vector = rts[depends].r * fast_vector - slow_vector = rts[depends].r * slow_vector - depends = ht[depends].depends_on - - return matrix.col(fast_vector), matrix.col(slow_vector), matrix.col(origin) - - -def mcstas_to_imgcif(v): - """Convert vector v to imgCIF frame from mcstas""" - - rot = matrix.sqr((-1, 0, 0, 0, 1, 0, 0, 0, -1)) - - return rot * v - - -def draw(filename): - - with h5py.File(filename, "r") as f: - # belt + braces here - validate_nxmx(f) - - # construct the tree for the detector - f, s, o = detector_fast_slow_origin(f) - - f = mcstas_to_imgcif(f) - s = mcstas_to_imgcif(s) - o = mcstas_to_imgcif(o) - - print(f.elems) - print(s.elems) - print(o.elems) - - -if __name__ == "__main__": - draw(sys.argv[1]) From 4a75fbaa57f710ee5e36d279ac208d583059882f Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 6 Oct 2020 08:27:10 +0100 Subject: [PATCH 55/65] Name it: dials.fp3 --- command_line/fp3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/command_line/fp3.py b/command_line/fp3.py index 51f8ba8c..47125e28 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -1,3 +1,5 @@ +# LIBTBX_SET_DISPATCHER_NAME dials.fp3 + import procrunner import sys import os From ef4b08d4b32d32be42924de6bc3657ce8c6ff0bc Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 6 Oct 2020 08:43:38 +0100 Subject: [PATCH 56/65] Procrunner API change --- command_line/fp3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index 47125e28..a00d570f 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -525,7 +525,7 @@ def scale(self, d_min=None): print_stderr=self._debug, ) - stdout = result["stdout"].split(b"\n") + stdout = result.stdout.split(b"\n") # find summary table for j, line in enumerate(stdout): @@ -562,7 +562,7 @@ def resolution(self): ) d_min = None - for record in result["stdout"].split(b"\n"): + for record in result.stdout.split(b"\n"): if record.startswith(b"Resolution cc_half:"): d_min = float(record.split()[-1]) From 3fa3fc27aed28256026124734982985e4096a35e Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 6 Oct 2020 08:43:56 +0100 Subject: [PATCH 57/65] More than one experiment --- command_line/fp3.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index a00d570f..d6f11caf 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -58,7 +58,7 @@ class FP3: def __init__(self, filenames, params): - self._experiment = ExperimentListFactory.from_filenames(filenames) + self._experiments = ExperimentListFactory.from_filenames(filenames) # parse PHIL parameters clai = scope.command_line_argument_interpreter() @@ -74,12 +74,14 @@ def __init__(self, filenames, params): logger.setLevel(getattr(logging, self._params.log_level)) logger.info(scope.fetch_diff(self._working).as_str()) + logger.info(f"Found {len(self._experiments)} scans to process") + self._crystal = None self._root = os.getcwd() self._n = nproc() # quick checks... - scan = self._experiment[0].scan + scan = self._experiments[0].scan osc = scan.get_oscillation() self._osc = osc @@ -130,7 +132,7 @@ def index(self): if os.path.exists(os.path.join(work, "indexed.expt")): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) - self._experiment[0].crystal = indexed[0].crystal + self._experiments[0].crystal = indexed[0].crystal logger.info("Picked up pre-existing crystal:") logger.info(indexed[0].crystal) return @@ -138,7 +140,7 @@ def index(self): if not os.path.exists(work): os.mkdir(work) - self._experiment.as_file(os.path.join(work, "input.expt")) + self._experiments.as_file(os.path.join(work, "input.expt")) five = int(round(5 / self._osc[1])) i0, i1 = self._image_range @@ -168,7 +170,7 @@ def index(self): indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) logger.info(indexed[0].crystal) - self._experiment[0].crystal = indexed[0].crystal + self._experiments[0].crystal = indexed[0].crystal def integrate(self): """Integration of the complete scan: will split the data into 5 deg @@ -231,7 +233,7 @@ def integrate_chunk(self, no, chunk): os.mkdir(work) # fix up the scan to correspond to input chunk - expt = copy.copy(self._experiment) + expt = copy.copy(self._experiments) expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] expt.as_file(os.path.join(work, "input.expt")) @@ -307,7 +309,7 @@ def integrate_chunk_script(self, no, chunk): os.mkdir(work) # fix up the scan to correspond to input chunk, save to working area - expt = copy.deepcopy(self._experiment) + expt = copy.deepcopy(self._experiments) expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] expt.as_file(os.path.join(work, "input.expt")) From e0165af4056f65ac45b356f5a66b7573876a042f Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 6 Oct 2020 09:24:49 +0100 Subject: [PATCH 58/65] Multi-sweeps... Add a check at the start that only one sweep has been passed as input and a collection of FIXME for coping with > 1 sweep / lattice in the longer term --- command_line/fp3.py | 54 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/command_line/fp3.py b/command_line/fp3.py index d6f11caf..2cd4a270 100644 --- a/command_line/fp3.py +++ b/command_line/fp3.py @@ -18,6 +18,8 @@ from dials_scratch.fp3 import nproc, combine_reflections, combine_experiments from dials_scratch.fp3 import find_setup_script, prime_factors +# FIXME add block size for processing to the input parameters + scope = phil.parse( """ find_spots { @@ -76,11 +78,15 @@ def __init__(self, filenames, params): logger.info(f"Found {len(self._experiments)} scans to process") + if len(self._experiments) > 1: + sys.exit("Multi-sweep data not supported at this time") + self._crystal = None self._root = os.getcwd() self._n = nproc() # quick checks... + # FIXME MULTI-SWEEEP will require attention scan = self._experiments[0].scan osc = scan.get_oscillation() @@ -142,6 +148,8 @@ def index(self): self._experiments.as_file(os.path.join(work, "input.expt")) + # FIXME MULTI-SWEEEP will require attention - for 5° blocks for each + # sweep will need to ensure that we compute the right blocks for each five = int(round(5 / self._osc[1])) i0, i1 = self._image_range blocks = [(b[0] + 1, b[1]) for b in index_blocks(i0 - 1, i1, self._osc[1])] @@ -168,6 +176,10 @@ def index(self): print_stderr=self._debug, ) + # FIXME MULTI-LATTICE quite possible at this stage that we have more + # than one lattice, on more than one experiment, so probably want to + # collate the unique crystals at this stage + indexed = ExperimentList.from_file(os.path.join(work, "indexed.expt")) logger.info(indexed[0].crystal) self._experiments[0].crystal = indexed[0].crystal @@ -176,6 +188,12 @@ def integrate(self): """Integration of the complete scan: will split the data into 5 deg chunks and spin the integration of each chunk off separately""" + # FIXME MULTI-SWEEP - in here if we have multiple sweeps (i.e. more than + # one imageset) need to integrate these separately, with the appropriate + # block sizes set. Probably want to form a list of tasks which includes + # all blocks for all image sets for parallel processing rather than + # iterating through the imagesets doing parallel processing on each. + irange = self._image_range nblocks = int(round(self._osc[1] * (irange[1] - irange[0] + 1) / 5.0)) @@ -183,6 +201,14 @@ def integrate(self): logger.info("Integrating...") + # FIXME in here define the integrate directory template based on the + # maximum number of blocks => don't hard code this. Should also have + # the imageset in the name e.g. integrate 0.01 for the second block of + # imageset 0. + + # API wise, if we make blocks a list of tuples here this could be pretty + # simple to modify i.e. (imageset, block) + if self._params.parallelism == "process": with concurrent.futures.ProcessPoolExecutor( max_workers=self._params.max_workers @@ -221,6 +247,8 @@ def integrate_chunk(self, no, chunk): logger.debug(f"Processing block {no} for images {chunk[0]} to {chunk[1]}") + # FIXME MULTI-SWEEEP in here dig out the right imageset etc. + work = os.path.join(self._root, "integrate%04d" % no) if os.path.exists(work): if all( @@ -232,6 +260,10 @@ def integrate_chunk(self, no, chunk): if not os.path.exists(work): os.mkdir(work) + # FIXME MULTI-SWEEEP pick the right scan etc. - also need to pick the + # _right_ experiments to copy i.e. those for which the image set is the + # _right_ image set. + # fix up the scan to correspond to input chunk expt = copy.copy(self._experiments) expt[0].scan = expt[0].scan[chunk[0] : chunk[1]] @@ -249,6 +281,9 @@ def integrate_chunk(self, no, chunk): print_stderr=False, ) + # FIXME check the return status, check for errors, verify that the + # files we are expecting to exist actually ... exist + phil = self._write_phil("index", work) result = procrunner.run( [ @@ -263,6 +298,9 @@ def integrate_chunk(self, no, chunk): print_stderr=False, ) + # FIXME check the return status, check for errors, verify that the + # files we are expecting to exist actually ... exist + phil = self._write_phil("refine", work) result = procrunner.run( ["dials.refine", "indexed.expt", "indexed.refl"] + phil, @@ -271,8 +309,11 @@ def integrate_chunk(self, no, chunk): print_stderr=False, ) + # FIXME check the return status, check for errors, verify that the + # files we are expecting to exist actually ... exist + phil = self._write_phil("integrate", work) - # FIXME nproc here depends on max_workers and parallelism mode + result = procrunner.run( ["dials.integrate", "refined.expt", "refined.refl", f"nproc={np}"] + phil, working_directory=work, @@ -402,6 +443,10 @@ def integrate_drmaa_array(self, blocks): session.synchronize(job_ids, drmaa.Session.TIMEOUT_WAIT_FOREVER, True) session.deleteJobTemplate(job) + # FIXME check the return status, check for errors, verify that the + # files we are expecting to exist actually ... exist, and if not, do + # something sensible. + def combine(self): """Collect together the data so far integrated.""" @@ -419,6 +464,8 @@ def combine(self): if not os.path.exists(work): os.mkdir(work) + # FIXME this should not be an assertion - if [] then sys.exit() + # with helpful message assert self._integrated is not [] integrated_refl = [ @@ -548,6 +595,9 @@ def resolution(self): assert self._scaled + # FIXME check to see if we have done this before and if we have, + # pick up the outcome from that calculation rather than re-running + logger.info("Estimating resolution...") source = os.path.join(self._scaled, "scaled") @@ -580,6 +630,8 @@ def resolution(self): params = [arg for arg in sys.argv[1:] if "=" in arg] filenames = sum(map(glob.glob, args), []) + # FIXME move all this to a main() function + fp3 = FP3(filenames, params) t0 = time.time() fp3.index() From 23d3231414771c15b2db39fc25a0ca0de1c05246 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Sat, 31 Oct 2020 05:50:27 +0000 Subject: [PATCH 59/65] Object files -> ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2e35bff1..529bfa35 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ _*.nb .git-notifier* git-notifier.log .gitversion + +# C object files +*.o From cf866c726fca564ead070c8a117e34fb881500df Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Sat, 31 Oct 2020 05:52:29 +0000 Subject: [PATCH 60/65] Revert "Object files -> ignore" This reverts commit 23d3231414771c15b2db39fc25a0ca0de1c05246. --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 529bfa35..2e35bff1 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,3 @@ _*.nb .git-notifier* git-notifier.log .gitversion - -# C object files -*.o From 365d39eb6570ed3eafcd88b72da9c36de6a01834 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Mon, 2 Nov 2020 16:22:33 +0000 Subject: [PATCH 61/65] WIP: combine partials --- fp3/tools.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/fp3/tools.py b/fp3/tools.py index 896fb207..4760f85b 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -2,9 +2,53 @@ from dxtbx.model.experiment_list import ExperimentList +def combine_partial_reflections(fins, fout): + """Combine reflection files from fins into a single output file fout. + Makes assumptions that the data are consistent from one to the next, no + particular tests are performed at the moment.""" + + d0 = flex.reflection_table.from_file(fins[0]) + for f in fins[1:]: + d1 = flex.reflection_table.from_file(f) + + d1.experiment_identifiers()[0] = d0.experiment_identifiers()[0] + + matches = d0.match(d1) + + for i, j in zip(*matches): + assert d0["miller_index"][i] == d1["miller_index"][j] + + # join up those reflections, extract them from the existing lists... + m0 = flex.bool(d0.size(), False) + m0.set_selected(matches[0], True) + d0p = d0.select(m0) + d0 = d0.select(~m0) + + m1 = flex.bool(d1.size(), False) + m1.set_selected(matches[1], True) + d1p = d1.select(m1) + d1 = d1.select(~m1) + + d0.extend(d1) + + # combine d0p, d1p + for j in range(d0p.size()): + d0p["partial_id"][j] = 100000000 + j + d1p["partial_id"][j] = 100000000 + j + + d01 = flex.reflection_table() + d01.extend(d0p) + d01.extend(d1p) + + d01 = sum_partial_reflections(d01) + d0.extend(d01) + + d0.as_file(fout) + + def combine_reflections(fins, fout): """Combine reflection files from fins into a single output file fout. - Makes assumptions that the data are consistent from one to the next, no + Makes assumptions that the data are consistent from one to the next, no particular tests are performed at the moment.""" d0 = flex.reflection_table.from_file(fins[0]) @@ -18,7 +62,7 @@ def combine_reflections(fins, fout): def combine_experiments(fins, fout): - """Combine experiment files from fins to a single output file fout. + """Combine experiment files from fins to a single output file fout. Makes assumptions that the crystal model is consistent from one experiment to the next, that the detectors are all similar.""" From 229ce6be825e49060f6cd761059e60a721647148 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 3 Nov 2020 07:50:58 +0000 Subject: [PATCH 62/65] WIP continued: now more sensible --- fp3/tools.py | 58 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/fp3/tools.py b/fp3/tools.py index 4760f85b..0ad51156 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -2,12 +2,13 @@ from dxtbx.model.experiment_list import ExperimentList -def combine_partial_reflections(fins, fout): +def combine_reflections(fins, fout): """Combine reflection files from fins into a single output file fout. Makes assumptions that the data are consistent from one to the next, no particular tests are performed at the moment.""" d0 = flex.reflection_table.from_file(fins[0]) + for f in fins[1:]: d1 = flex.reflection_table.from_file(f) @@ -19,34 +20,67 @@ def combine_partial_reflections(fins, fout): assert d0["miller_index"][i] == d1["miller_index"][j] # join up those reflections, extract them from the existing lists... + # copy the unmatched reflections over to copy of the output data + # then merge the partials and copy them in + + # detail: select(matches[]) reorders the reflections as well + m0 = flex.bool(d0.size(), False) m0.set_selected(matches[0], True) - d0p = d0.select(m0) + d0p = d0.select(matches[0]) d0 = d0.select(~m0) m1 = flex.bool(d1.size(), False) m1.set_selected(matches[1], True) - d1p = d1.select(m1) + d1p = d1.select(matches[1]) d1 = d1.select(~m1) d0.extend(d1) - # combine d0p, d1p + prf = "intensity.prf.value" in d0p + + # combine d0p, d1p - copy the information from d1p[j] to d0p[j] for j in range(d0p.size()): - d0p["partial_id"][j] = 100000000 + j - d1p["partial_id"][j] = 100000000 + j + d0p["partiality"][j] += d1p["partiality"][j] + + if prf: + + # weight profiles by (I/sig(I))^2 as done in dials.export - + # should probably prune the reflection lists in here to only + # include useful measurements before I get to this point... - d01 = flex.reflection_table() - d01.extend(d0p) - d01.extend(d1p) + i0 = d0p["intensity.prf.value"][j] + v0 = d0p["intensity.prf.variance"][j] + w0 = i0 * i0 / v0 - d01 = sum_partial_reflections(d01) - d0.extend(d01) + i1 = d1p["intensity.prf.value"][j] + v1 = d1p["intensity.prf.variance"][j] + w1 = i1 * i1 / v1 + + if w0 + w1 > 0: + i = (i0 * w0 + i1 * w1) / (w0 + w1) + v = (v0 * w0 + v1 * w1) / (w0 + w1) + + d0p["intensity.prf.value"][j] = i + d0p["intensity.prf.variance"][j] = v + + d0p["intensity.sum.value"][j] += d1p["intensity.sum.value"][j] + d0p["intensity.sum.variance"][j] += d1p["intensity.sum.variance"][j] + + d0p["background.sum.value"][j] += d1p["background.sum.value"][j] + d0p["background.sum.variance"][j] += d1p["background.sum.variance"][j] + + # extend the bbox... sort out the flags... + b = d0p["bbox"][j] + _b = d1p["bbox"][j] + d0p["bbox"][j] = (b[0], _b[1], b[2], b[3], b[4], b[5]) + + d0.extend(d0p) d0.as_file(fout) -def combine_reflections(fins, fout): +def dumb_combine_reflections(fins, fout): """Combine reflection files from fins into a single output file fout. Makes assumptions that the data are consistent from one to the next, no particular tests are performed at the moment.""" From e013fada6151881c31082802374834d943986c46 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Tue, 3 Nov 2020 07:51:30 +0000 Subject: [PATCH 63/65] Remove old implementation --- fp3/tools.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/fp3/tools.py b/fp3/tools.py index 0ad51156..013c43e4 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -80,21 +80,6 @@ def combine_reflections(fins, fout): d0.as_file(fout) -def dumb_combine_reflections(fins, fout): - """Combine reflection files from fins into a single output file fout. - Makes assumptions that the data are consistent from one to the next, no - particular tests are performed at the moment.""" - - d0 = flex.reflection_table.from_file(fins[0]) - for f in fins[1:]: - d1 = flex.reflection_table.from_file(f) - - d1.experiment_identifiers()[0] = d0.experiment_identifiers()[0] - d0.extend(d1) - - d0.as_file(fout) - - def combine_experiments(fins, fout): """Combine experiment files from fins to a single output file fout. Makes assumptions that the crystal model is consistent from one experiment From e97dccb285dacfc22f3a054da34f7fabb59f8902 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Wed, 4 Nov 2020 15:27:00 +0000 Subject: [PATCH 64/65] Remove reflections with -ve variance Co-authored-by: jbeilstenedmands <30625594+jbeilstenedmands@users.noreply.github.com> --- fp3/tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fp3/tools.py b/fp3/tools.py index 013c43e4..79a0ba5a 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -8,9 +8,15 @@ def combine_reflections(fins, fout): particular tests are performed at the moment.""" d0 = flex.reflection_table.from_file(fins[0]) + if "intensity.prf.variance" in d0: + d0 = d0.select(d0["intensity.prf.variance"] > 0) + d0 = d0.select(d0["intensity.sum.variance"] > 0) for f in fins[1:]: d1 = flex.reflection_table.from_file(f) + if "intensity.prf.variance" in d1: + d1 = d1.select(d1["intensity.prf.variance"] > 0) + d1 = d1.select(d1["intensity.sum.variance"] > 0) d1.experiment_identifiers()[0] = d0.experiment_identifiers()[0] From d2bda55dda7ef032db83d4f5eaffacd71cda4e36 Mon Sep 17 00:00:00 2001 From: Graeme Winter Date: Wed, 26 Jan 2022 14:16:30 +0000 Subject: [PATCH 65/65] API change --- fp3/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fp3/tools.py b/fp3/tools.py index 79a0ba5a..28b8d941 100644 --- a/fp3/tools.py +++ b/fp3/tools.py @@ -22,7 +22,7 @@ def combine_reflections(fins, fout): matches = d0.match(d1) - for i, j in zip(*matches): + for i, j, _ in zip(*matches): assert d0["miller_index"][i] == d1["miller_index"][j] # join up those reflections, extract them from the existing lists...