From f4b99e38721dfdb8e16bfe5143d89899ada17780 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Sun, 16 Nov 2025 11:28:33 +0100 Subject: [PATCH 01/14] brought over NDrebin from refactor_24_3D branch --- sasdata/transforms/NDrebin.py | 539 ++++++++++++++++++++++++++++++++++ 1 file changed, 539 insertions(+) create mode 100644 sasdata/transforms/NDrebin.py diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py new file mode 100644 index 00000000..43615ba6 --- /dev/null +++ b/sasdata/transforms/NDrebin.py @@ -0,0 +1,539 @@ + +import numpy as np +from numpy._typing import ArrayLike + +from typing import List, Optional, Sequence + +from sasdata.quantities.quantity import Quantity + +import time + + + +def NDrebin(data: Quantity[ArrayLike], + coords: Quantity[ArrayLike], + axes: Optional[list[Quantity[ArrayLike]]] = None, + limits: Optional[List[Sequence[float]]] = None, + step_size: Optional[List[Sequence[float]]] = None, + num_bins: Optional[List[Sequence[int]]] = None, + subpixel: Optional[bool] = False + ): + """ + Provide values at points with ND coordinates. + The coordinates may not be in a nice grid. + The data can be in any array shape. + + The coordinates are in the same shape plus one dimension, + preferably the first dimension, which is the ND coordinate + position + + Rebin that data into a regular grid. + + Note that this does lose some information from the underlying data, + as you are essentially averaging multiple measurements into one bin. + + :data: The data at each point + :coords: The locations of each data point, same size of data + plus one more dimension with the same length as the + dimensionality of the space (Ndim) + :axes: The axes of the coordinate system we are binning + into. Defaults to diagonal (e.g. (1,0,0), (0,1,0), and + (0,0,1) for 3D data). A list of Ndim element vectors + :limits: The limits along each axis. Defaults to the smallest + and largest values in the data if no limits are provided. + A list of 2 element vectors. + :step_size: The size of steps along each axis. Supercedes + num_bins. A list of length Ndim. + :num_bins: The number of bins along each axis. Superceded by + step_size if step_size is provided. At least one of step_size + or num_bins must be provided. + :subpixel: Whether to perform subpixel binning or not. Defaults + to false. + -If false, measurements are binned into one bin, + the one they fall within. Roughly a "nearest neighbor" + approach. + -If true, subpixel binning will be applied, where + the value of a measurement is distributed to its 2^Ndim + nearest neighbors weighted by proximity. For example, if + a point falls exactly between two bins, its value will be + given to both bins with 50% weight. This is roughly a + "linear interpolation" approach. Tends to do better at + reducing sharp peaks and edges if data is sampled unevenly. + However, this is roughly 2^Ndim times slower since you have + to address each bin 2^Ndim more times. + + + Returns: binned_data, bin_centers_list + :binned_data: has size num_bins and is NDimensional + :bin_centers_list: is a list of vectors + + """ + + # Identify number of points + Nvals = data.size + + # Identify number of dimensions + Ndims = coords.size / Nvals + + # if Ndims is not an integer value we have a problem + if not Ndims.is_integer(): + raise ValueError(f"The coords have to have the same shape as " + "the data, plus one more dimension which is " + "length Ndims") + Ndims = int(Ndims) + + # flatten input data to 1D of length Nvals + data_flat = data.reshape(-1) + errors_flat = 0*data_flat # TODO add error propagation + + # check if the first axis of coords is the dimensions axis + if coords.shape[0] == Ndims: + # first check if it is the first axis + dim_axis = 0 + elif coords.shape[-1] == Ndims: + # now check if it is the last axis + dim_axis = -1 + else: + # search if any axis is size Ndims + dim_axis = next(i for i, s in enumerate(coords.shape) if s == Ndims) + + if not coords.shape[dim_axis] == Ndims: + raise ValueError(f"The coords have to have one dimension which is " + "the dimensionality of the space") + + # flatten coords to size Nvals x Ndims + moved = np.moveaxis(coords, dim_axis, 0) + coords_flat = moved.reshape(Ndims, -1).T + + # if axes are not provided, default to identity + if axes is None: + axes = np.eye(Ndims) + + + # now project the data into the axes + axes_inv = np.linalg.inv(axes) + coords_flat = np.tensordot(coords_flat, axes_inv, axes=([1], [0])) + + + # if limits were not provided, default to the min and max + # coord in each dimension + if limits is None: + limits = np.zeros((2,Ndims)) + for ind in range(Ndims): + limits[0,ind] = np.min(coords_flat[:,ind]) + limits[1,ind] = np.max(coords_flat[:,ind]) + + # clean up limits + for ind in range(Ndims): + # if individual limits are nan, inf, none, etc, replace with min/max + if not np.isfinite(limits[0,ind]): + limits[0,ind] = np.min(coords_flat[:,ind]) + if not np.isfinite(limits[1,ind]): + limits[1,ind] = np.max(coords_flat[:,ind]) + # if any of the limits are in the wrong order, flip them + if limits[0,ind] > limits[1,ind]: + temp = limits[0,ind] + limits[0,ind] = limits[1,ind] + limits[1,ind] = temp + + + # bins_list is a Ndims long list of vectors which are the edges of + # each bin. Each vector is num_bins[i]+1 long + bins_list = [] + + # bin_centers_list is a Ndims long list of vectors which are the centers of + # each bin. Each vector is num_bins[i] long + bin_centers_list = [] + + # create the bins in each dimension + if step_size is None: + # if step_size was not specified, derive from num_bins + step_size = [] + for ind in range(Ndims): + these_bins = np.linspace(limits[0,ind], limits[1,ind], num_bins[ind]+1) + these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 + this_step_size = these_bins[1] - these_bins[0] + + bins_list.append(these_bins) + bin_centers_list.append(these_centers) + step_size.append(this_step_size) + else: + # else use step_size and derive num_bins + num_bins = [] + for ind in range(Ndims): + if limits[0,ind] == limits[1,ind]: + # min and max of limits are the same, i.e. data has to be exactly this + these_bins = np.array([limits[0,ind], limits[0,ind]]) + else: + these_bins = np.arange(limits[0,ind], limits[1,ind], step_size[ind]) + if these_bins[-1] != limits[1,ind]: + these_bins = np.append(these_bins, limits[1,ind]) + these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 + this_num_bins = these_bins.size-1 + + bins_list.append(these_bins) + bin_centers_list.append(these_centers) + num_bins.append(this_num_bins) + + + # create the binned data matrix of size num_bins[0] x num_bins[1] x ... + # binned_data = np.zeros(num_bins) + # binned_data_errs = np.zeros(num_bins) + # n_samples = np.zeros(num_bins) + + # create the bin inds for each data point as a Nvals x Ndims long vector + bin_inds = np.zeros((Nvals, Ndims)) + for ind in range(Ndims): + this_min = bins_list[ind][0] + this_step = step_size[ind] + bin_inds[:, ind] = (coords_flat[:,ind] - this_min) / this_step + # any that are outside the bin limits should be removed + bin_inds[coords_flat[:, ind]bins_list[ind][-1], ind] = np.nan + + if not subpixel: + # this is a non-vector way of binning the data: + # for ind in range(Nvals): + # this_bin_ind = bin_inds[ind,:] + # if not np.isnan(this_bin_ind).any(): + # this_bin_ind = this_bin_ind.astype(int) + # binned_data[*this_bin_ind] = binned_data[*this_bin_ind] + data_flat[ind] + # binned_data_errs[*this_bin_ind] = binned_data_errs[*this_bin_ind] + errors_flat[ind]**2 + # n_samples[*this_bin_ind] = n_samples[*this_bin_ind] + 1 + + + # and here is a vector equivalent + # ------------------------------------------------------------- + # Inputs: + # bin_inds : (Nvals, Ndims) array of indices, some rows may contain NaN + # data_flat : (Nvals,) values to accumulate + # errors_flat : (Nvals,) errors to accumulate (squared) + # binned_data : Ndims-dimensional array (output) + # binned_data_errs : Ndims-dimensional array (output) + # n_samples : Ndims-dimensional array (output) + # ------------------------------------------------------------- + + # 1. Identify valid rows (no NaNs) + valid = ~np.isnan(bin_inds).any(axis=1) + + # 2. Convert valid bins to integer indices + inds_int = bin_inds[valid].astype(int) + + # 3. Map multidimensional indices → flat indices + flat_idx = np.ravel_multi_index(inds_int.T, dims=num_bins) + + # 4. Use bincount to accumulate in a vectorized way + size = np.prod(num_bins) + + bd_sum = np.bincount(flat_idx, weights=data_flat[valid], minlength=size) + err_sum = np.bincount(flat_idx, weights=errors_flat[valid]**2, minlength=size) + ns_sum = np.bincount(flat_idx, minlength=size) + + # 5. Reshape and add into the original arrays + binned_data = bd_sum.reshape(num_bins) + binned_data_errs = err_sum.reshape(num_bins) + n_samples = ns_sum.reshape(num_bins) + + + else: + # more convenient to work with half shifted inds + bin_inds = bin_inds - 0.5 + + # 1. Identify valid rows (no NaNs) + valid = ~np.isnan(bin_inds).any(axis=1) + valid_inds = bin_inds[valid] + partial_weights = 1.-np.mod(valid_inds, 1) + + + # for each dimension, double the amount of subpixels + # for a point at x_i, 1-x_i goes to + for ind in range(Ndims): + # will be where the bin goes + arr_mod = valid_inds.copy() + arr_mod[:, ind] += 1. + valid_inds = np.vstack([valid_inds, arr_mod]) + # how close it is to that bin + arr_mod = partial_weights.copy() + arr_mod[:, ind] = 1. - arr_mod[:, ind] + partial_weights = np.vstack([partial_weights, arr_mod]) + + + # any bins that ended up outside just get clamped + for ind in range(Ndims): + valid_inds[valid_inds[:, ind]<0, ind] = 0 + valid_inds[valid_inds[:, ind]>num_bins[ind]-1, ind] = num_bins[ind]-1 + + # weights are the product of partial weights + weights = np.prod(partial_weights, axis=1) + + # need to tile the data and errs to weight them for each bin + data_valid = np.tile(data_flat[valid], 2**Ndims) + errs_valid = np.tile(errors_flat[valid], 2**Ndims) + + # 2. Convert valid bins to integer indices + inds_int = valid_inds.astype(int) + + # 3. Map multidimensional indices → flat indices + flat_idx = np.ravel_multi_index(inds_int.T, dims=num_bins) + + # 4. Use bincount to accumulate in a vectorized way + size = np.prod(num_bins) + + bd_sum = np.bincount(flat_idx, weights=weights*data_valid, minlength=size) + err_sum = np.bincount(flat_idx, weights=(weights**2)*(errs_valid**2), minlength=size) + ns_sum = np.bincount(flat_idx, weights=weights, minlength=size) + + # 5. Reshape and add into the original arrays + binned_data = bd_sum.reshape(num_bins) + binned_data_errs = err_sum.reshape(num_bins) + n_samples = ns_sum.reshape(num_bins) + + + + + # normalize binned_data by the number of times sampled + with np.errstate(divide='ignore', invalid='ignore'): + binned_data = np.divide(binned_data, n_samples) + binned_data_errs = np.divide(np.sqrt(binned_data_errs), n_samples) + + # any bins with no samples is nan + binned_data[n_samples==0] = np.nan + binned_data_errs[n_samples==0] = np.nan + + + return binned_data, bin_centers_list + + + + + + + + + +if __name__ == "__main__": + import numpy as np + import matplotlib.pyplot as plt + + if True: + # Parameters for the Gaussian function + mu = 0.0 # Mean + sigma = 1.0 # Standard deviation + noise = 0.1 + Nvals = int(1e6) + + # Generate Nvals random values between -5 and 5 + x = -5 + (5 + 5) * np.random.rand(Nvals) # shape: (Nvals,) + + # Build qmat equivalent to MATLAB: + # qmat = [x(:)'; 0*x(:)'; 0*x(:)'] + # → 3 x Nvals array + qmat = np.vstack([ + x, + np.zeros_like(x), + np.zeros_like(x) + ]) + + # Evaluate the Gaussian function + I_1 = np.exp(-((x - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) + + # Add uniform noise in [-noise, +noise] + I_1 = I_1 - noise + 2 * noise * np.random.rand(Nvals) + + # Fiducial + xreal = np.linspace(-5, 5, 1001) + Ireal = np.exp(-((xreal - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) + + # NDrebin + import time + start = time.perf_counter() + Ibin, qbin = NDrebin(I_1, qmat, + step_size=[0.1,np.inf,np.inf] + ) + end = time.perf_counter() + print(f"Computed {Nvals} points in {end - start:.6f} seconds") + + start = time.perf_counter() + Ibin2, qbin2 = NDrebin(I_1, qmat, + step_size=[0.1,np.inf,np.inf], + subpixel = True + ) + end = time.perf_counter() + print(f"Computed {Nvals} points with subpixels in {end - start:.6f} seconds") + + # Plot + plt.figure() + plt.plot(np.squeeze(qbin[0]), np.squeeze(Ibin), 'o', linewidth=2, label='sum') + plt.plot(np.squeeze(qbin2[0]), np.squeeze(Ibin2), 'o', linewidth=2, label='fractional') + plt.plot(xreal, Ireal, 'k-', linewidth=2, label='analytic') + + plt.xlabel('x') + plt.ylabel('I') + plt.legend() + plt.tight_layout() + plt.show(block=False) + + + + + + if True: + # Parameters of the 2D Gaussian + mu = np.array([0.15, 0.0, 0.0]) # Mean vector + sigma = np.array([0.015, 0.055, 0.05]) # Std dev in x and y + noise = 0. + SDD = 2.7 + k_0 = 2*np.pi/5 + pix_x = 1./128. + pix_y = 1./128. + + # Generate our 2D detector grid + x = np.arange(-64,64)*pix_x + y = np.arange(-64,64)*pix_y + + [xmesh, ymesh] = np.meshgrid(x, y) + + # calculate qx, qy, qz + qx = k_0*xmesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) + qy = k_0*ymesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) + qz = k_0-k_0*SDD/np.sqrt(xmesh**2+ymesh**2+SDD**2) + + # qmat + qmat0 = np.stack([qx,qy,qz], axis=2) + + # now rotate about y + angle_list = np.pi/180*np.linspace(-15,15,int(30/.25)) + qmat = np.zeros((len(x), len(y), len(angle_list), 3)) + for ind in range(len(angle_list)): + new_qmat = np.copy(qmat0) + new_qmat[:,:,0] = np.cos(angle_list[ind])*qmat0[:,:,0] - \ + np.sin(angle_list[ind])*qmat0[:,:,2] + new_qmat[:,:,2] = np.sin(angle_list[ind])*qmat0[:,:,0] + \ + np.cos(angle_list[ind])*qmat0[:,:,2] + qmat[:,:,ind,:] = qmat0 + + + # Evaluate Gaussian: + # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) + I_2D = ( + np.exp( + -((qmat[:,:,:,0] - mu[0])**2) / (2 * sigma[0]**2) + -((qmat[:,:,:,1] - mu[1])**2) / (2 * sigma[1]**2) + -((qmat[:,:,:,2] - mu[2])**2) / (2 * sigma[2]**2) + ) / + (2 * np.pi * sigma[0] * sigma[1] * sigma[2]) + ) + + # Add uniform noise + I_2D = I_2D - noise + 2 * noise * np.random.rand(*I_2D.shape) + + # Rebin in 2D. + # You can choose finite steps for both x and y depending on how you want bins defined. + import time + start = time.perf_counter() + Ibin, qbin = NDrebin(I_2D, qmat, + step_size=[0.006, 0.006, np.inf] + ) + end = time.perf_counter() + print(f"Computed {qmat.size/3} points in {end - start:.6f} seconds") + + start = time.perf_counter() + Ibin2, qbin2 = NDrebin(I_2D, qmat, + step_size=[0.0035, 0.0035, np.inf], + subpixel=True + ) + end = time.perf_counter() + print(f"Computed {qmat.size/3} points with subpixels in {end - start:.6f} seconds") + + + # Fiducial 2D + [xmesh, ymesh] = np.meshgrid(qbin2[0], qbin2[1]) + Ireal = ( + np.exp( + -((xmesh - mu[0])**2) / (2 * sigma[0]**2) + -((ymesh - mu[1])**2) / (2 * sigma[1]**2) + ) / + (2 * np.pi * sigma[0] * sigma[1]) + ) + + # Plot a 1D slice of the binned data along x (y bins aggregated) + plt.figure(figsize=(4,8)) + + plt.subplot(3, 1, 1) + plt.pcolormesh(qbin[0], qbin[1], np.squeeze(Ibin.T), shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('sum') + plt.colorbar() + plt.tight_layout() + + plt.subplot(3, 1, 2) + plt.pcolormesh(qbin2[0], qbin2[1], np.squeeze(Ibin2.T), shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('sum') + plt.colorbar() + plt.tight_layout() + + plt.subplot(3, 1, 3) + plt.pcolormesh(xmesh, ymesh, Ireal, shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('real') + plt.colorbar() + plt.tight_layout() + plt.show(block=False) + + + + + + + + # test ND gaussian + Ndims = 3 + mu = np.zeros(Ndims) # Mean vector + sigma = np.random.rand(Ndims) # Std dev in x and y + noise = 0.1 + Nvals = int(1e6) + + # Generate random points (x, y) in a 2D square + qmat = -5 + 10 * np.random.rand(Ndims, Nvals) + + # Evaluate 2D Gaussian: + # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) + exp_op = np.zeros(Nvals) + sigma_tot = 1 + for ind in range(Ndims): + exp_op = exp_op -((qmat[ind,:] - mu[ind])**2) / (2 * sigma[ind]**2) + sigma_tot = sigma_tot * sigma[ind] + I_ND = ( + np.exp(exp_op) / + (2 * np.pi * sigma_tot) + ) + + # Add uniform noise + I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) + + # Rebin in 2D. + # You can choose finite steps for both x and y depending on how you want bins defined. + import time + start = time.perf_counter() + Ibin, qbin = NDrebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + limits=np.array([[1,2,3],[9,8,7]]) + ) + end = time.perf_counter() + print(f"Computed {Nvals} points in {end - start:.6f} seconds") + + start = time.perf_counter() + Ibin, qbin = NDrebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + limits=np.array([[1,2,3],[9,8,7]]), + subpixel=True + ) + end = time.perf_counter() + print(f"Computed {Nvals} points with subpixels in {end - start:.6f} seconds") + + input() \ No newline at end of file From 7f5421e426670699788d18dc339c6e0fb843f577 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Mon, 17 Nov 2025 11:12:46 +0100 Subject: [PATCH 02/14] updated NDrebin a bunch and also added NDrebin testing --- sasdata/transforms/NDrebin.py | 240 +++++++++++++++++++++++++------ test/transforms/utest_NDrebin.py | 149 +++++++++++++++++++ 2 files changed, 344 insertions(+), 45 deletions(-) create mode 100644 test/transforms/utest_NDrebin.py diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 43615ba6..4a163b34 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -12,11 +12,13 @@ def NDrebin(data: Quantity[ArrayLike], coords: Quantity[ArrayLike], + data_errs: Optional[Quantity[ArrayLike]] = None, axes: Optional[list[Quantity[ArrayLike]]] = None, - limits: Optional[List[Sequence[float]]] = None, + upper: Optional[List[Sequence[float]]] = None, + lower: Optional[List[Sequence[float]]] = None, step_size: Optional[List[Sequence[float]]] = None, num_bins: Optional[List[Sequence[int]]] = None, - subpixel: Optional[bool] = False + fractional: Optional[bool] = False ): """ Provide values at points with ND coordinates. @@ -36,23 +38,27 @@ def NDrebin(data: Quantity[ArrayLike], :coords: The locations of each data point, same size of data plus one more dimension with the same length as the dimensionality of the space (Ndim) + :data_errs: Optional, the same size as data, the uncertainties on data :axes: The axes of the coordinate system we are binning into. Defaults to diagonal (e.g. (1,0,0), (0,1,0), and (0,0,1) for 3D data). A list of Ndim element vectors - :limits: The limits along each axis. Defaults to the smallest - and largest values in the data if no limits are provided. - A list of 2 element vectors. + :upper: The upper limits along each axis. Defaults to the largest + values in the data if no limits are provided. + A 1D list of Ndims values. + :lower: The lower limits along each axis. Defaults to the smallest + values in the data if no limits are provided. + A 1D list of Ndims values. :step_size: The size of steps along each axis. Supercedes num_bins. A list of length Ndim. :num_bins: The number of bins along each axis. Superceded by step_size if step_size is provided. At least one of step_size or num_bins must be provided. - :subpixel: Whether to perform subpixel binning or not. Defaults + :fractional: Whether to perform fractional binning or not. Defaults to false. -If false, measurements are binned into one bin, the one they fall within. Roughly a "nearest neighbor" approach. - -If true, subpixel binning will be applied, where + -If true, fractional binning will be applied, where the value of a measurement is distributed to its 2^Ndim nearest neighbors weighted by proximity. For example, if a point falls exactly between two bins, its value will be @@ -64,8 +70,62 @@ def NDrebin(data: Quantity[ArrayLike], Returns: binned_data, bin_centers_list - :binned_data: has size num_bins and is NDimensional - :bin_centers_list: is a list of vectors + :binned_data: has size num_bins and is NDimensional, contains + the binned data + :bin_centers_list: is a list of 1D vectors, contains the + axes of the binned data. The coordinates of bin [i,j,k] + is given by + bin_centers_list[0][i]*axes[i]+bin_centers_list[1][j]*axes[j]+ + bin_centers_list[0][k]*axes[k] + :binned_data_errs: has size num_bins and is NDimensional, contains + the propagated errors of the binned_data + :bins_list: is a list of 1D vectors, is similar to bin_centers_list, + but instead contains the edges of the bins, so it is 1 longer + in each dimension + :step_size: is a list of Ndims numbers, contains the step size + along each dimensino + :num_bins: is a list of Ndims numbers, contains the number + of bins along each dimension + + + An example call might be: + # test syntax 1 + Ndims = 4 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + Ibin, qbin, *rest = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + results = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + Ibin = results[0] + qbin = results[1] + bins_list = results[2] + step_size = results[3] + num_bins = results[4] + + + # test syntax 2 + Ndims = 2 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ + = NDrebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True + ) """ @@ -84,7 +144,19 @@ def NDrebin(data: Quantity[ArrayLike], # flatten input data to 1D of length Nvals data_flat = data.reshape(-1) - errors_flat = 0*data_flat # TODO add error propagation + if data_errs is None: + no_errs = True + errors_flat = 0*data_flat # no errors + else: + no_errs = False + errors_flat = data_errs.reshape(-1) + + if errors_flat.shape != data_flat.shape: + ValueError("Data and errors have to have the same shape.") + + # if 1D, need to add a size 1 dimension index to coords + if Ndims == 1: + coords = coords.reshape(-1, 1) # check if the first axis of coords is the dimensions axis if coords.shape[0] == Ndims: @@ -117,24 +189,41 @@ def NDrebin(data: Quantity[ArrayLike], # if limits were not provided, default to the min and max # coord in each dimension - if limits is None: - limits = np.zeros((2,Ndims)) + if upper is None: + upper = np.zeros((Ndims)) for ind in range(Ndims): - limits[0,ind] = np.min(coords_flat[:,ind]) - limits[1,ind] = np.max(coords_flat[:,ind]) + upper[ind] = np.max(coords_flat[:,ind]) + if lower is None: + lower = np.zeros((Ndims)) + for ind in range(Ndims): + lower[ind] = np.min(coords_flat[:,ind]) + + # if provided just one limit for 1D as a scalar, make it a list + # for formatting purposes + lower = np.atleast_1d(lower) + upper = np.atleast_1d(upper) + # if not isinstance(lower, (list, tuple)): + # lower = [lower] + # print(lower) + # if not isinstance(upper, (list, tuple)): + # upper = [upper] # clean up limits + if lower.size != Ndims: + ValueError("Lower limits must be None or a 1D iterable of length Ndims.") + if upper.size != Ndims: + ValueError("Upper limits must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): # if individual limits are nan, inf, none, etc, replace with min/max - if not np.isfinite(limits[0,ind]): - limits[0,ind] = np.min(coords_flat[:,ind]) - if not np.isfinite(limits[1,ind]): - limits[1,ind] = np.max(coords_flat[:,ind]) + if not np.isfinite(lower[ind]): + lower[ind] = np.min(coords_flat[:,ind]) + if not np.isfinite(upper[ind]): + upper[ind] = np.max(coords_flat[:,ind]) # if any of the limits are in the wrong order, flip them - if limits[0,ind] > limits[1,ind]: - temp = limits[0,ind] - limits[0,ind] = limits[1,ind] - limits[1,ind] = temp + if lower[ind] > upper[ind]: + temp = lower[ind] + lower[ind] = upper[ind] + upper[ind] = temp # bins_list is a Ndims long list of vectors which are the edges of @@ -145,12 +234,21 @@ def NDrebin(data: Quantity[ArrayLike], # each bin. Each vector is num_bins[i] long bin_centers_list = [] + + # create the bins in each dimension if step_size is None: # if step_size was not specified, derive from num_bins step_size = [] + # if provided just one num_bin for 1D as a scalar, make it a list + # for formatting purposes + # if not isinstance(num_bins, (list, tuple)): + # num_bins = [num_bins] + num_bins = np.atleast_1d(num_bins) + if num_bins.size != Ndims: + ValueError("num_bins must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): - these_bins = np.linspace(limits[0,ind], limits[1,ind], num_bins[ind]+1) + these_bins = np.linspace(lower[ind], upper[ind], num_bins[ind]+1) these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 this_step_size = these_bins[1] - these_bins[0] @@ -160,14 +258,21 @@ def NDrebin(data: Quantity[ArrayLike], else: # else use step_size and derive num_bins num_bins = [] + # if provided just one step_size for 1D as a scalar, make it a list + # for formatting purposes + # if not isinstance(step_size, (list, tuple)): + # step_size = [step_size] + step_size = np.atleast_1d(step_size) + if step_size.size != Ndims: + ValueError("step_size must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): - if limits[0,ind] == limits[1,ind]: + if lower[ind] == upper[ind]: # min and max of limits are the same, i.e. data has to be exactly this - these_bins = np.array([limits[0,ind], limits[0,ind]]) + these_bins = np.array([lower[ind], lower[ind]]) else: - these_bins = np.arange(limits[0,ind], limits[1,ind], step_size[ind]) - if these_bins[-1] != limits[1,ind]: - these_bins = np.append(these_bins, limits[1,ind]) + these_bins = np.arange(lower[ind], upper[ind], step_size[ind]) + if these_bins[-1] != upper[ind]: + these_bins = np.append(these_bins, upper[ind]) these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 this_num_bins = these_bins.size-1 @@ -192,7 +297,7 @@ def NDrebin(data: Quantity[ArrayLike], bin_inds[coords_flat[:, ind]==bins_list[ind][-1], ind] = num_bins[ind]-1 bin_inds[coords_flat[:, ind]>bins_list[ind][-1], ind] = np.nan - if not subpixel: + if not fractional: # this is a non-vector way of binning the data: # for ind in range(Nvals): # this_bin_ind = bin_inds[ind,:] @@ -246,7 +351,7 @@ def NDrebin(data: Quantity[ArrayLike], partial_weights = 1.-np.mod(valid_inds, 1) - # for each dimension, double the amount of subpixels + # for each dimension, double the amount of subpoints # for a point at x_i, 1-x_i goes to for ind in range(Ndims): # will be where the bin goes @@ -302,7 +407,10 @@ def NDrebin(data: Quantity[ArrayLike], binned_data_errs[n_samples==0] = np.nan - return binned_data, bin_centers_list + if no_errs: + return binned_data, bin_centers_list, bins_list, step_size, num_bins + else: + return binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins @@ -313,6 +421,7 @@ def NDrebin(data: Quantity[ArrayLike], if __name__ == "__main__": + # a bunch of local testing import numpy as np import matplotlib.pyplot as plt @@ -348,19 +457,19 @@ def NDrebin(data: Quantity[ArrayLike], # NDrebin import time start = time.perf_counter() - Ibin, qbin = NDrebin(I_1, qmat, + Ibin, qbin, *rest = NDrebin(I_1, qmat, step_size=[0.1,np.inf,np.inf] ) end = time.perf_counter() print(f"Computed {Nvals} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin2, qbin2 = NDrebin(I_1, qmat, + Ibin2, qbin2, *rest = NDrebin(I_1, qmat, step_size=[0.1,np.inf,np.inf], - subpixel = True + fractional = True ) end = time.perf_counter() - print(f"Computed {Nvals} points with subpixels in {end - start:.6f} seconds") + print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") # Plot plt.figure() @@ -432,19 +541,19 @@ def NDrebin(data: Quantity[ArrayLike], # You can choose finite steps for both x and y depending on how you want bins defined. import time start = time.perf_counter() - Ibin, qbin = NDrebin(I_2D, qmat, + Ibin, qbin, *rest = NDrebin(I_2D, qmat, step_size=[0.006, 0.006, np.inf] ) end = time.perf_counter() print(f"Computed {qmat.size/3} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin2, qbin2 = NDrebin(I_2D, qmat, + Ibin2, qbin2, *rest = NDrebin(I_2D, qmat, step_size=[0.0035, 0.0035, np.inf], - subpixel=True + fractional=True ) end = time.perf_counter() - print(f"Computed {qmat.size/3} points with subpixels in {end - start:.6f} seconds") + print(f"Computed {qmat.size/3} points with fractional binning in {end - start:.6f} seconds") # Fiducial 2D @@ -520,20 +629,61 @@ def NDrebin(data: Quantity[ArrayLike], # You can choose finite steps for both x and y depending on how you want bins defined. import time start = time.perf_counter() - Ibin, qbin = NDrebin(I_ND, qmat, + Ibin, qbin, *rest = NDrebin(I_ND, qmat, step_size=0.2*np.random.rand(Ndims)+0.1, - limits=np.array([[1,2,3],[9,8,7]]) + lower=[1,2,3], + upper=[9,8,7] ) end = time.perf_counter() print(f"Computed {Nvals} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin, qbin = NDrebin(I_ND, qmat, + Ibin, qbin, *rest = NDrebin(I_ND, qmat, step_size=0.2*np.random.rand(Ndims)+0.1, - limits=np.array([[1,2,3],[9,8,7]]), - subpixel=True + lower=[1,2,3], + upper=[9,8,7], + fractional=True ) end = time.perf_counter() - print(f"Computed {Nvals} points with subpixels in {end - start:.6f} seconds") + print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") + + + + # test syntax + Ndims = 4 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + Ibin, qbin, *rest = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + results = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + Ibin = results[0] + qbin = results[1] + bins_list = results[2] + step_size = results[3] + num_bins = results[4] + + # test syntax + Ndims = 2 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ + = NDrebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True + ) input() \ No newline at end of file diff --git a/test/transforms/utest_NDrebin.py b/test/transforms/utest_NDrebin.py new file mode 100644 index 00000000..d593a5cd --- /dev/null +++ b/test/transforms/utest_NDrebin.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest +from matplotlib import pyplot as plt +import time + +from sasdata.transforms.NDrebin import NDrebin + + + + +def test_1D_exact(show_plots: bool): + # Parameters for the Gaussian function + mu = 0.0 # Mean + sigma = 1.0 # Standard deviation + + # fiducial + xreal = np.linspace(-5, 5, 11) + Ireal = np.exp(-((xreal - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) + + # rebin to the exact same bins + Ibin, qbin, *rest = NDrebin(Ireal, xreal, + lower=-5.5, upper=5.5, num_bins=11) + + assert all(Ibin == Ireal) + assert all(qbin[0] == xreal) + + # Plot + if show_plots: + plt.figure() + plt.plot(qbin[0], Ibin, 'o', linewidth=2, label='bin') + plt.plot(xreal, Ireal, 'k-', linewidth=2, label='exact') + + plt.xlabel('x') + plt.ylabel('I') + plt.legend() + plt.tight_layout() + plt.show() + + + # rebin to the exact same bins with fractional + Ibin, qbin, *rest = NDrebin(Ireal, xreal, lower=-5.5, upper=5.5, num_bins=11, fractional=True) + + assert all(Ibin == Ireal) + + # Plot + if show_plots: + plt.figure() + plt.plot(qbin[0], Ibin, 'o', linewidth=2, label='fractional bin') + plt.plot(xreal, Ireal, 'k-', linewidth=2, label='exact') + + plt.xlabel('x') + plt.ylabel('I') + plt.legend() + plt.tight_layout() + plt.show() + + +def test_syntax(): + # test syntax + Ndims = 4 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + Ibin, qbin, *rest = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + results = NDrebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9 + ) + Ibin = results[0] + qbin = results[1] + bins_list = results[2] + step_size = results[3] + num_bins = results[4] + + # test syntax + Ndims = 2 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ + = NDrebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True + ) + + +# test ND gaussian +def test_ND(): + Ndims = 4 + mu = np.zeros(Ndims) # Mean vector + sigma = np.random.rand(Ndims) # Std dev in x and y + noise = 0.1 + Nvals = int(1e6) + + # Generate random points (x, y) in a 2D square + qmat = -5 + 10 * np.random.rand(Ndims, Nvals) + + # Evaluate 2D Gaussian: + # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) + exp_op = np.zeros(Nvals) + sigma_tot = 1 + for ind in range(Ndims): + exp_op = exp_op -((qmat[ind,:] - mu[ind])**2) / (2 * sigma[ind]**2) + sigma_tot = sigma_tot * sigma[ind] + I_ND = ( + np.exp(exp_op) / + (2 * np.pi * sigma_tot) + ) + + # Add uniform noise + I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) + + # Rebin in 2D. + # You can choose finite steps for both x and y depending on how you want bins defined. + start = time.perf_counter() + Ibin, qbin, *rest = NDrebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + lower=[1,2,3,0], + upper=[9,8,7,9.5] + ) + end = time.perf_counter() + print(f"Computed {Nvals} points in {end - start:.6f} seconds") + + start = time.perf_counter() + Ibin, qbin, *rest = NDrebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + lower=[1,2,3,0], + upper=[9,8,7,9.5], + fractional=True + ) + end = time.perf_counter() + print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") + + + +if __name__ == "__main__": + test_1D_exact(show_plots=False) + test_syntax() + test_ND() \ No newline at end of file From 2ca006285fe4183bd071d836bfde99b005e23097 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 11:30:09 +0000 Subject: [PATCH 03/14] [pre-commit.ci lite] apply automatic fixes for ruff linting errors --- sasdata/transforms/NDrebin.py | 58 +++++++++++++++----------------- test/transforms/utest_NDrebin.py | 12 +++---- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 4a163b34..fe1f6cd2 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -1,24 +1,22 @@ +import time +from collections.abc import Sequence + import numpy as np from numpy._typing import ArrayLike -from typing import List, Optional, Sequence - from sasdata.quantities.quantity import Quantity -import time - - def NDrebin(data: Quantity[ArrayLike], coords: Quantity[ArrayLike], - data_errs: Optional[Quantity[ArrayLike]] = None, - axes: Optional[list[Quantity[ArrayLike]]] = None, - upper: Optional[List[Sequence[float]]] = None, - lower: Optional[List[Sequence[float]]] = None, - step_size: Optional[List[Sequence[float]]] = None, - num_bins: Optional[List[Sequence[int]]] = None, - fractional: Optional[bool] = False + data_errs: Quantity[ArrayLike] | None = None, + axes: list[Quantity[ArrayLike]] | None = None, + upper: list[Sequence[float]] | None = None, + lower: list[Sequence[float]] | None = None, + step_size: list[Sequence[float]] | None = None, + num_bins: list[Sequence[int]] | None = None, + fractional: bool | None = False ): """ Provide values at points with ND coordinates. @@ -137,7 +135,7 @@ def NDrebin(data: Quantity[ArrayLike], # if Ndims is not an integer value we have a problem if not Ndims.is_integer(): - raise ValueError(f"The coords have to have the same shape as " + raise ValueError("The coords have to have the same shape as " "the data, plus one more dimension which is " "length Ndims") Ndims = int(Ndims) @@ -153,7 +151,7 @@ def NDrebin(data: Quantity[ArrayLike], if errors_flat.shape != data_flat.shape: ValueError("Data and errors have to have the same shape.") - + # if 1D, need to add a size 1 dimension index to coords if Ndims == 1: coords = coords.reshape(-1, 1) @@ -168,9 +166,9 @@ def NDrebin(data: Quantity[ArrayLike], else: # search if any axis is size Ndims dim_axis = next(i for i, s in enumerate(coords.shape) if s == Ndims) - + if not coords.shape[dim_axis] == Ndims: - raise ValueError(f"The coords have to have one dimension which is " + raise ValueError("The coords have to have one dimension which is " "the dimensionality of the space") # flatten coords to size Nvals x Ndims @@ -187,14 +185,14 @@ def NDrebin(data: Quantity[ArrayLike], coords_flat = np.tensordot(coords_flat, axes_inv, axes=([1], [0])) - # if limits were not provided, default to the min and max + # if limits were not provided, default to the min and max # coord in each dimension if upper is None: - upper = np.zeros((Ndims)) + upper = np.zeros(Ndims) for ind in range(Ndims): upper[ind] = np.max(coords_flat[:,ind]) if lower is None: - lower = np.zeros((Ndims)) + lower = np.zeros(Ndims) for ind in range(Ndims): lower[ind] = np.min(coords_flat[:,ind]) @@ -296,7 +294,7 @@ def NDrebin(data: Quantity[ArrayLike], bin_inds[coords_flat[:, ind]bins_list[ind][-1], ind] = np.nan - + if not fractional: # this is a non-vector way of binning the data: # for ind in range(Nvals): @@ -352,7 +350,7 @@ def NDrebin(data: Quantity[ArrayLike], # for each dimension, double the amount of subpoints - # for a point at x_i, 1-x_i goes to + # for a point at x_i, 1-x_i goes to for ind in range(Ndims): # will be where the bin goes arr_mod = valid_inds.copy() @@ -363,15 +361,15 @@ def NDrebin(data: Quantity[ArrayLike], arr_mod[:, ind] = 1. - arr_mod[:, ind] partial_weights = np.vstack([partial_weights, arr_mod]) - + # any bins that ended up outside just get clamped for ind in range(Ndims): valid_inds[valid_inds[:, ind]<0, ind] = 0 valid_inds[valid_inds[:, ind]>num_bins[ind]-1, ind] = num_bins[ind]-1 - + # weights are the product of partial weights weights = np.prod(partial_weights, axis=1) - + # need to tile the data and errs to weight them for each bin data_valid = np.tile(data_flat[valid], 2**Ndims) errs_valid = np.tile(errors_flat[valid], 2**Ndims) @@ -394,7 +392,7 @@ def NDrebin(data: Quantity[ArrayLike], binned_data_errs = err_sum.reshape(num_bins) n_samples = ns_sum.reshape(num_bins) - + # normalize binned_data by the number of times sampled @@ -422,8 +420,8 @@ def NDrebin(data: Quantity[ArrayLike], if __name__ == "__main__": # a bunch of local testing - import numpy as np import matplotlib.pyplot as plt + import numpy as np if True: # Parameters for the Gaussian function @@ -500,7 +498,7 @@ def NDrebin(data: Quantity[ArrayLike], # Generate our 2D detector grid x = np.arange(-64,64)*pix_x y = np.arange(-64,64)*pix_y - + [xmesh, ymesh] = np.meshgrid(x, y) # calculate qx, qy, qz @@ -537,7 +535,7 @@ def NDrebin(data: Quantity[ArrayLike], # Add uniform noise I_2D = I_2D - noise + 2 * noise * np.random.rand(*I_2D.shape) - # Rebin in 2D. + # Rebin in 2D. # You can choose finite steps for both x and y depending on how you want bins defined. import time start = time.perf_counter() @@ -625,7 +623,7 @@ def NDrebin(data: Quantity[ArrayLike], # Add uniform noise I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) - # Rebin in 2D. + # Rebin in 2D. # You can choose finite steps for both x and y depending on how you want bins defined. import time start = time.perf_counter() @@ -686,4 +684,4 @@ def NDrebin(data: Quantity[ArrayLike], fractional=True ) - input() \ No newline at end of file + input() diff --git a/test/transforms/utest_NDrebin.py b/test/transforms/utest_NDrebin.py index d593a5cd..ae8f7b70 100644 --- a/test/transforms/utest_NDrebin.py +++ b/test/transforms/utest_NDrebin.py @@ -1,13 +1,11 @@ +import time + import numpy as np -import pytest from matplotlib import pyplot as plt -import time from sasdata.transforms.NDrebin import NDrebin - - def test_1D_exact(show_plots: bool): # Parameters for the Gaussian function mu = 0.0 # Mean @@ -36,7 +34,7 @@ def test_1D_exact(show_plots: bool): plt.tight_layout() plt.show() - + # rebin to the exact same bins with fractional Ibin, qbin, *rest = NDrebin(Ireal, xreal, lower=-5.5, upper=5.5, num_bins=11, fractional=True) @@ -120,7 +118,7 @@ def test_ND(): # Add uniform noise I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) - # Rebin in 2D. + # Rebin in 2D. # You can choose finite steps for both x and y depending on how you want bins defined. start = time.perf_counter() Ibin, qbin, *rest = NDrebin(I_ND, qmat, @@ -146,4 +144,4 @@ def test_ND(): if __name__ == "__main__": test_1D_exact(show_plots=False) test_syntax() - test_ND() \ No newline at end of file + test_ND() From d2c85f278cbb319da9f119f541c51e64ad572a3f Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Mon, 17 Nov 2025 16:37:28 +0100 Subject: [PATCH 04/14] added note on using NDrebin for M dimensional integration --- sasdata/transforms/NDrebin.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index fe1f6cd2..d8b70145 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -32,6 +32,11 @@ def NDrebin(data: Quantity[ArrayLike], Note that this does lose some information from the underlying data, as you are essentially averaging multiple measurements into one bin. + Note that once can use this function to perform integrations over + one or more dimensions by setting the num_bins to 1 or the + step_size to infinity for one or more axes. The integration will + be performed from the lower to upper bound of that axis. + :data: The data at each point :coords: The locations of each data point, same size of data plus one more dimension with the same length as the From 6cc296ead003db0fd33013d4f2e2c33f82de091a Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 09:57:55 +0100 Subject: [PATCH 05/14] first pass at rebasing NDrebin to a class --- sasdata/transforms/NDrebin.py | 344 ++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index d8b70145..359ea1d9 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -8,6 +8,349 @@ from sasdata.quantities.quantity import Quantity +class binnedNDdata: + + def __init__(self, binned_data: Quantity[ArrayLike], + bin_centers_list: list[Quantity[ArrayLike]] | None = None, + binned_data_errs: Quantity[ArrayLike] | None = None, + bins_list: list[Quantity[ArrayLike]] | None = None, + step_size: list[Sequence[float]] | None = None, + num_bins: list[Sequence[int]] | None = None, + ): + self.binned_data = binned_data + self.bin_centers_list = bin_centers_list + self.binned_data_errs = binned_data_errs + self.bins_list = bins_list + self.step_size = step_size + self.num_bins = num_bins + +class NDRebin: + + def __init__(self, data: Quantity[ArrayLike], + coords: Quantity[ArrayLike], + data_errs: Quantity[ArrayLike] | None = None, + axes: list[Quantity[ArrayLike]] | None = None, + upper: list[Sequence[float]] | None = None, + lower: list[Sequence[float]] | None = None, + step_size: list[Sequence[float]] | None = None, + num_bins: list[Sequence[int]] | None = None, + fractional: bool | None = False + ): + self.data = data + self.coords = coords + self.data_errs = data_errs + self.axes = axes + self.upper = upper + self.lower = lower + self.step_size = step_size + self.num_bins = num_bins + self.fractional = fractional + self.binned_data = np.zeroes(self.data.shape()) # I'm not sure this is actually correct. + + # check the size of the data and coords inputs + # and define Ndims and Nvals + self.check_data_coords() + + # flatten the input data and errors + self.check_data_errs() + + # flatten the coords + self.flatten_coords() + + # handle optional axes + if self.axes is None: + # make axes if not provided + self.make_axes() + else: + # project into specified axes + self.project_axes() + + # build the limits + self.build_limits() + + # make the bins + self.make_bins() + + # make the bin indices + self.create_bin_inds() + + def __call__(self): + if self.fractional: + self._calculate_fractional_binning() + else: + self.calculate_bins() + + def check_data_coords(self): + # Identify number of points + self.Nvals = self.data.size + + # Identify number of dimensions + self.Ndims = self.coords.size / self.Nvals + + # if Ndims is not an integer value we have a problem + if not self.Ndims.is_integer(): + raise ValueError("The coords have to have the same shape as " + "the data, plus one more dimension which is " + "length Ndims") + self.Ndims = int(Ndims) + + def check_data_errs(self): + # flatten input data to 1D of length Nvals + self.data_flat = self.data.reshape(-1) + if self.data_errs is None: + self.errors_flat = 0*self.data_flat # no errors + else: + self.errors_flat = self.data_errs.reshape(-1) + + if self.errors_flat.shape != self.data_flat.shape: + ValueError("Data and errors have to have the same shape.") + + def flatten_coords(self): + # if 1D, need to add a size 1 dimension index to coords + if self.Ndims == 1: + self.coords = self.coords.reshape(-1, 1) + + # check if the first axis of coords is the dimensions axis + if self.coords.shape[0] == self.Ndims: + # first check if it is the first axis + self.dim_axis = 0 + elif self.coords.shape[-1] == self.Ndims: + # now check if it is the last axis + self.dim_axis = -1 + else: + # search if any axis is size Ndims + self.dim_axis = next(i for i, s in enumerate(self.coords.shape) if s == self.Ndims) + + if not self.coords.shape[self.dim_axis] == self.Ndims: + raise ValueError("The coords have to have one dimension which is " + "the dimensionality of the space") + + # flatten coords to size Nvals x Ndims + moved = np.moveaxis(self.coords, self.dim_axis, 0) + self.coords_flat = moved.reshape(self.Ndims, -1).T + + def make_axes(self): + # if axes are not provided, default to identity + if self.axes is None: + self.axes = np.eye(self.Ndims) + + def project_axes(self): + # now project the data into the axes + self.axes_inv = np.linalg.inv(self.axes) + self.coords_flat = np.tensordot(self.coords_flat, self.axes_inv, axes=([1], [0])) + + def build_limits(self): + # if limits were not provided, default to the min and max + # coord in each dimension + if self.upper is None: + self.upper = np.zeros(self.Ndims) + for ind in range(self.Ndims): + self.upper[ind] = np.max(self.coords_flat[:,ind]) + if self.lower is None: + self.lower = np.zeros(self.Ndims) + for ind in range(self.Ndims): + self.lower[ind] = np.min(self.coords_flat[:,ind]) + + # if provided just one limit for 1D as a scalar, make it a list + # for formatting purposes + self.lower = np.atleast_1d(self.lower) + self.upper = np.atleast_1d(self.upper) + + # clean up limits + if self.lower.size != self.Ndims: + ValueError("Lower limits must be None or a 1D iterable of length Ndims.") + if self.upper.size != self.Ndims: + ValueError("Upper limits must be None or a 1D iterable of length Ndims.") + for ind in range(self.Ndims): + # if individual limits are nan, inf, none, etc, replace with min/max + if not np.isfinite(self.lower[ind]): + self.lower[ind] = np.min(self.coords_flat[:,ind]) + if not np.isfinite(self.upper[ind]): + self.upper[ind] = np.max(self.coords_flat[:,ind]) + # if any of the limits are in the wrong order, flip them + if self.lower[ind] > self.upper[ind]: + temp = self.lower[ind] + self.lower[ind] = self.upper[ind] + self.upper[ind] = temp + + def make_bins(self): + # bins_list is a Ndims long list of vectors which are the edges of + # each bin. Each vector is num_bins[i]+1 long + self.bins_list = [] + + # bin_centers_list is a Ndims long list of vectors which are the centers of + # each bin. Each vector is num_bins[i] long + self.bin_centers_list = [] + + # create the bins in each dimension + if self.step_size is None: + self.step_size_from_num_bins() + else: + self.num_bins_from_step_size() + + def step_size_from_num_bins(self): + # if step_size was not specified, derive from num_bins + self.step_size = [] + # if provided just one num_bin for 1D as a scalar, make it a list + # for formatting purposes + self.num_bins = np.atleast_1d(self.num_bins) + if self.num_bins.size != self.Ndims: + ValueError("num_bins must be None or a 1D iterable of length Ndims.") + for ind in range(self.Ndims): + these_bins = np.linspace(self.lower[ind], self.upper[ind], self.num_bins[ind]+1) + these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 + this_step_size = these_bins[1] - these_bins[0] + + self.bins_list.append(these_bins) + self.bin_centers_list.append(these_centers) + self.step_size.append(this_step_size) + + def num_bins_from_step_size(self): + # if num_bins was not specified, derive from step_size + self.num_bins = [] + # if provided just one step_size for 1D as a scalar, make it a list + # for formatting purposes + self.step_size = np.atleast_1d(step_size) + if self.step_size.size != self.Ndims: + ValueError("step_size must be None or a 1D iterable of length Ndims.") + for ind in range(self.Ndims): + if self.lower[ind] == self.upper[ind]: + # min and max of limits are the same, i.e. data has to be exactly this + these_bins = np.array([self.lower[ind], self.lower[ind]]) + else: + these_bins = np.arange(self.lower[ind], self.upper[ind], self.step_size[ind]) + if these_bins[-1] != self.upper[ind]: + these_bins = np.append(these_bins, self.upper[ind]) + these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 + this_num_bins = these_bins.size-1 + + self.bins_list.append(these_bins) + self.bin_centers_list.append(these_centers) + self.num_bins.append(this_num_bins) + + def create_bin_inds(self): + # create the bin inds for each data point as a Nvals x Ndims long vector + self.bin_inds = np.zeros((self.Nvals, self.Ndims)) + for ind in range(self.Ndims): + this_min = self.bins_list[ind][0] + this_step = self.step_size[ind] + self.bin_inds[:, ind] = (self.coords_flat[:,ind] - this_min) / this_step + # any that are outside the bin limits should be removed + self.bin_inds[self.coords_flat[:, ind]< self.bins_list[ind][0], ind] = np.nan + self.bin_inds[self.coords_flat[:, ind]==self.bins_list[ind][-1], ind] = num_bins[ind]-1 + self.bin_inds[self.coords_flat[:, ind]> self.bins_list[ind][-1], ind] = np.nan + + def calculate_bins(self): + + # this is a non-vector way of binning the data: + # for ind in range(Nvals): + # this_bin_ind = bin_inds[ind,:] + # if not np.isnan(this_bin_ind).any(): + # this_bin_ind = this_bin_ind.astype(int) + # binned_data[*this_bin_ind] = binned_data[*this_bin_ind] + data_flat[ind] + # binned_data_errs[*this_bin_ind] = binned_data_errs[*this_bin_ind] + errors_flat[ind]**2 + # n_samples[*this_bin_ind] = n_samples[*this_bin_ind] + 1 + + + # and here is a vector equivalent + # ------------------------------------------------------------- + # Inputs: + # bin_inds : (Nvals, Ndims) array of indices, some rows may contain NaN + # data_flat : (Nvals,) values to accumulate + # errors_flat : (Nvals,) errors to accumulate (squared) + # binned_data : Ndims-dimensional array (output) + # binned_data_errs : Ndims-dimensional array (output) + # n_samples : Ndims-dimensional array (output) + # ------------------------------------------------------------- + + # 1. Identify valid rows (no NaNs) + valid = ~np.isnan(self.bin_inds).any(axis=1) + + # 2. Convert valid bins to integer indices + inds_int = self.bin_inds[valid].astype(int) + + # 3. Map multidimensional indices → flat indices + flat_idx = np.ravel_multi_index(inds_int.T, dims=self.num_bins) + + # 4. Use bincount to accumulate in a vectorized way + size = np.prod(self.num_bins) + + bd_sum = np.bincount(flat_idx, weights=self.data_flat[valid], minlength=size) + err_sum = np.bincount(flat_idx, weights=self.errors_flat[valid]**2, minlength=size) + ns_sum = np.bincount(flat_idx, minlength=size) + + # 5. Reshape and add into the original arrays + self.binned_data = bd_sum.reshape(self.num_bins) + self.binned_data_errs = err_sum.reshape(self.num_bins) + self.n_samples = ns_sum.reshape(self.num_bins) + + def calculate_fractional_binning(self): + + # more convenient to work with half shifted inds + bin_inds_frac = self.bin_inds - 0.5 + + # 1. Identify valid rows (no NaNs) + valid = ~np.isnan(bin_inds_frac).any(axis=1) + valid_inds = bin_inds_frac[valid] + partial_weights = 1.-np.mod(valid_inds, 1) + + + # for each dimension, double the amount of subpoints + # for a point at x_i, 1-x_i goes to + for ind in range(self.Ndims): + # will be where the bin goes + arr_mod = valid_inds.copy() + arr_mod[:, ind] += 1. + valid_inds = np.vstack([valid_inds, arr_mod]) + # how close it is to that bin + arr_mod = partial_weights.copy() + arr_mod[:, ind] = 1. - arr_mod[:, ind] + partial_weights = np.vstack([partial_weights, arr_mod]) + + + # any bins that ended up outside just get clamped + for ind in range(self.Ndims): + valid_inds[valid_inds[:, ind]<0, ind] = 0 + valid_inds[valid_inds[:, ind]>self.num_bins[ind]-1, ind] = self.num_bins[ind]-1 + + # weights are the product of partial weights + weights = np.prod(partial_weights, axis=1) + + # need to tile the data and errs to weight them for each bin + data_valid = np.tile(self.data_flat[valid], 2**self.Ndims) + errs_valid = np.tile(self.errors_flat[valid], 2**self.Ndims) + + # 2. Convert valid bins to integer indices + inds_int = valid_inds.astype(int) + + # 3. Map multidimensional indices → flat indices + flat_idx = np.ravel_multi_index(inds_int.T, dims=self.num_bins) + + # 4. Use bincount to accumulate in a vectorized way + size = np.prod(self.num_bins) + + bd_sum = np.bincount(flat_idx, weights=weights*data_valid, minlength=size) + err_sum = np.bincount(flat_idx, weights=(weights**2)*(errs_valid**2), minlength=size) + ns_sum = np.bincount(flat_idx, weights=weights, minlength=size) + + # 5. Reshape and add into the original arrays + self.binned_data = bd_sum.reshape(num_bins) + self.binned_data_errs = err_sum.reshape(num_bins) + self.n_samples = ns_sum.reshape(num_bins) + + def norm_data(self): + # normalize binned_data by the number of times sampled + with np.errstate(divide='ignore', invalid='ignore'): + binned_data = np.divide(binned_data, self.n_samples) + binned_data_errs = np.divide(np.sqrt(binned_data_errs), self.n_samples) + + # any bins with no samples is nan + binned_data[self.n_samples==0] = np.nan + binned_data_errs[self.n_samples==0] = np.nan + + + + def NDrebin(data: Quantity[ArrayLike], coords: Quantity[ArrayLike], data_errs: Quantity[ArrayLike] | None = None, @@ -92,6 +435,7 @@ def NDrebin(data: Quantity[ArrayLike], An example call might be: + .. code-block:: # test syntax 1 Ndims = 4 Nvals = int(1e5) From 95bea507be9b12cca0cbd81ba9771bea928e155d Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 11:32:54 +0100 Subject: [PATCH 06/14] second pass at cleaning up NDrebin class --- sasdata/transforms/NDrebin.py | 161 ++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 57 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 359ea1d9..b7fcbf21 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -9,6 +9,18 @@ class binnedNDdata: + """ + N-dimensional rebinning of data into regular bins, with optional + fractional binning and error propagation. + + Typical usage + ------------- + rebin = NDRebin(data, coords, num_bins=[10, 20]) + rebin.prepare() + rebin.run() + binned = rebin.binned_data + errs = rebin.binned_data_errs + """ def __init__(self, binned_data: Quantity[ArrayLike], bin_centers_list: list[Quantity[ArrayLike]] | None = None, @@ -25,16 +37,17 @@ def __init__(self, binned_data: Quantity[ArrayLike], self.num_bins = num_bins class NDRebin: - - def __init__(self, data: Quantity[ArrayLike], - coords: Quantity[ArrayLike], - data_errs: Quantity[ArrayLike] | None = None, - axes: list[Quantity[ArrayLike]] | None = None, - upper: list[Sequence[float]] | None = None, - lower: list[Sequence[float]] | None = None, - step_size: list[Sequence[float]] | None = None, - num_bins: list[Sequence[int]] | None = None, - fractional: bool | None = False + def __init__( + self, + data: Quantity[ArrayLike], + coords: Quantity[ArrayLike], + data_errs: Quantity[ArrayLike] | None = None, + axes: ArrayLike | None = None, + upper: ArrayLike | None = None, + lower: ArrayLike | None = None, + step_size: ArrayLike | None = None, + num_bins: ArrayLike | None = None, + fractional: bool = False, ): self.data = data self.coords = coords @@ -45,56 +58,89 @@ def __init__(self, data: Quantity[ArrayLike], self.step_size = step_size self.num_bins = num_bins self.fractional = fractional - self.binned_data = np.zeroes(self.data.shape()) # I'm not sure this is actually correct. + + # Internal attributes initialised later + self.Nvals: int | None = None + self.Ndims: int | None = None + self.data_flat = None + self.errors_flat = None + self.coords_flat = None + self.bins_list = None + self.bin_centers_list = None + self.bin_inds = None + self.binned_data = None + self.binned_data_errs = None + self.n_samples = None + + self._prepared = False # flag to avoid double-prepare + + def __call__(self): + self.run() + return self.binned_data + + def prepare(self) -> None: + """Compute derived quantities: shapes, flattened data, bins, indices.""" + if self._prepared: + return # check the size of the data and coords inputs # and define Ndims and Nvals - self.check_data_coords() + self._check_data_coords() # flatten the input data and errors - self.check_data_errs() + self._check_data_errs() # flatten the coords - self.flatten_coords() + self._flatten_coords() # handle optional axes if self.axes is None: # make axes if not provided - self.make_axes() + self._make_axes() else: # project into specified axes - self.project_axes() + self._project_axes() # build the limits - self.build_limits() + self._build_limits() # make the bins - self.make_bins() + self._make_bins() # make the bin indices - self.create_bin_inds() + self._create_bin_inds() + + self._prepared = True + + + def run(self) -> None: + """Bin the data into the defined bins.""" + if not self._prepared: + self.prepare() - def __call__(self): if self.fractional: - self._calculate_fractional_binning() + self._calculate_fractional_bins() else: - self.calculate_bins() + self._calculate_bins() + + self._norm_data() - def check_data_coords(self): + def _check_data_coords(self): + """Compute Nvals and Ndims and validate shapes.""" # Identify number of points - self.Nvals = self.data.size + self.Nvals = int(self.data.size) # Identify number of dimensions - self.Ndims = self.coords.size / self.Nvals + Ndims = self.coords.size / self.Nvals # if Ndims is not an integer value we have a problem - if not self.Ndims.is_integer(): + if not float(Ndims).is_integer(): raise ValueError("The coords have to have the same shape as " "the data, plus one more dimension which is " "length Ndims") self.Ndims = int(Ndims) - def check_data_errs(self): + def _check_data_errs(self): # flatten input data to 1D of length Nvals self.data_flat = self.data.reshape(-1) if self.data_errs is None: @@ -103,9 +149,9 @@ def check_data_errs(self): self.errors_flat = self.data_errs.reshape(-1) if self.errors_flat.shape != self.data_flat.shape: - ValueError("Data and errors have to have the same shape.") + raise ValueError("Data and errors have to have the same shape.") - def flatten_coords(self): + def _flatten_coords(self): # if 1D, need to add a size 1 dimension index to coords if self.Ndims == 1: self.coords = self.coords.reshape(-1, 1) @@ -129,17 +175,17 @@ def flatten_coords(self): moved = np.moveaxis(self.coords, self.dim_axis, 0) self.coords_flat = moved.reshape(self.Ndims, -1).T - def make_axes(self): + def _make_axes(self): # if axes are not provided, default to identity if self.axes is None: self.axes = np.eye(self.Ndims) - def project_axes(self): + def _project_axes(self): # now project the data into the axes self.axes_inv = np.linalg.inv(self.axes) self.coords_flat = np.tensordot(self.coords_flat, self.axes_inv, axes=([1], [0])) - def build_limits(self): + def _build_limits(self): # if limits were not provided, default to the min and max # coord in each dimension if self.upper is None: @@ -158,9 +204,9 @@ def build_limits(self): # clean up limits if self.lower.size != self.Ndims: - ValueError("Lower limits must be None or a 1D iterable of length Ndims.") + raise ValueError("Lower limits must be None or a 1D iterable of length Ndims.") if self.upper.size != self.Ndims: - ValueError("Upper limits must be None or a 1D iterable of length Ndims.") + raise ValueError("Upper limits must be None or a 1D iterable of length Ndims.") for ind in range(self.Ndims): # if individual limits are nan, inf, none, etc, replace with min/max if not np.isfinite(self.lower[ind]): @@ -173,7 +219,7 @@ def build_limits(self): self.lower[ind] = self.upper[ind] self.upper[ind] = temp - def make_bins(self): + def _make_bins(self): # bins_list is a Ndims long list of vectors which are the edges of # each bin. Each vector is num_bins[i]+1 long self.bins_list = [] @@ -188,14 +234,14 @@ def make_bins(self): else: self.num_bins_from_step_size() - def step_size_from_num_bins(self): + def _step_size_from_num_bins(self): # if step_size was not specified, derive from num_bins self.step_size = [] # if provided just one num_bin for 1D as a scalar, make it a list # for formatting purposes self.num_bins = np.atleast_1d(self.num_bins) if self.num_bins.size != self.Ndims: - ValueError("num_bins must be None or a 1D iterable of length Ndims.") + raise ValueError("num_bins must be None or a 1D iterable of length Ndims.") for ind in range(self.Ndims): these_bins = np.linspace(self.lower[ind], self.upper[ind], self.num_bins[ind]+1) these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 @@ -205,14 +251,14 @@ def step_size_from_num_bins(self): self.bin_centers_list.append(these_centers) self.step_size.append(this_step_size) - def num_bins_from_step_size(self): + def _num_bins_from_step_size(self): # if num_bins was not specified, derive from step_size self.num_bins = [] # if provided just one step_size for 1D as a scalar, make it a list # for formatting purposes - self.step_size = np.atleast_1d(step_size) + self.step_size = np.atleast_1d(self.step_size) if self.step_size.size != self.Ndims: - ValueError("step_size must be None or a 1D iterable of length Ndims.") + raise ValueError("step_size must be None or a 1D iterable of length Ndims.") for ind in range(self.Ndims): if self.lower[ind] == self.upper[ind]: # min and max of limits are the same, i.e. data has to be exactly this @@ -228,7 +274,7 @@ def num_bins_from_step_size(self): self.bin_centers_list.append(these_centers) self.num_bins.append(this_num_bins) - def create_bin_inds(self): + def _create_bin_inds(self): # create the bin inds for each data point as a Nvals x Ndims long vector self.bin_inds = np.zeros((self.Nvals, self.Ndims)) for ind in range(self.Ndims): @@ -237,10 +283,10 @@ def create_bin_inds(self): self.bin_inds[:, ind] = (self.coords_flat[:,ind] - this_min) / this_step # any that are outside the bin limits should be removed self.bin_inds[self.coords_flat[:, ind]< self.bins_list[ind][0], ind] = np.nan - self.bin_inds[self.coords_flat[:, ind]==self.bins_list[ind][-1], ind] = num_bins[ind]-1 + self.bin_inds[self.coords_flat[:, ind]==self.bins_list[ind][-1], ind] = self.num_bins[ind]-1 self.bin_inds[self.coords_flat[:, ind]> self.bins_list[ind][-1], ind] = np.nan - def calculate_bins(self): + def _calculate_bins(self): # this is a non-vector way of binning the data: # for ind in range(Nvals): @@ -284,7 +330,7 @@ def calculate_bins(self): self.binned_data_errs = err_sum.reshape(self.num_bins) self.n_samples = ns_sum.reshape(self.num_bins) - def calculate_fractional_binning(self): + def _calculate_fractional_bins(self): # more convenient to work with half shifted inds bin_inds_frac = self.bin_inds - 0.5 @@ -334,19 +380,20 @@ def calculate_fractional_binning(self): ns_sum = np.bincount(flat_idx, weights=weights, minlength=size) # 5. Reshape and add into the original arrays - self.binned_data = bd_sum.reshape(num_bins) - self.binned_data_errs = err_sum.reshape(num_bins) - self.n_samples = ns_sum.reshape(num_bins) + self.binned_data = bd_sum.reshape(self.num_bins) + self.binned_data_errs = err_sum.reshape(self.num_bins) + self.n_samples = ns_sum.reshape(self.num_bins) - def norm_data(self): + def _norm_data(self): # normalize binned_data by the number of times sampled with np.errstate(divide='ignore', invalid='ignore'): - binned_data = np.divide(binned_data, self.n_samples) - binned_data_errs = np.divide(np.sqrt(binned_data_errs), self.n_samples) + self.binned_data = np.divide(self.binned_data, self.n_samples) + self.binned_data_errs = np.divide(np.sqrt(self.binned_data_errs), self.n_samples) # any bins with no samples is nan - binned_data[self.n_samples==0] = np.nan - binned_data_errs[self.n_samples==0] = np.nan + mask = self.n_samples == 0 + self.binned_data[mask] = np.nan + self.binned_data_errs[mask] = np.nan @@ -499,7 +546,7 @@ def NDrebin(data: Quantity[ArrayLike], errors_flat = data_errs.reshape(-1) if errors_flat.shape != data_flat.shape: - ValueError("Data and errors have to have the same shape.") + raise ValueError("Data and errors have to have the same shape.") # if 1D, need to add a size 1 dimension index to coords if Ndims == 1: @@ -557,9 +604,9 @@ def NDrebin(data: Quantity[ArrayLike], # clean up limits if lower.size != Ndims: - ValueError("Lower limits must be None or a 1D iterable of length Ndims.") + raise ValueError("Lower limits must be None or a 1D iterable of length Ndims.") if upper.size != Ndims: - ValueError("Upper limits must be None or a 1D iterable of length Ndims.") + raise ValueError("Upper limits must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): # if individual limits are nan, inf, none, etc, replace with min/max if not np.isfinite(lower[ind]): @@ -593,7 +640,7 @@ def NDrebin(data: Quantity[ArrayLike], # num_bins = [num_bins] num_bins = np.atleast_1d(num_bins) if num_bins.size != Ndims: - ValueError("num_bins must be None or a 1D iterable of length Ndims.") + raise ValueError("num_bins must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): these_bins = np.linspace(lower[ind], upper[ind], num_bins[ind]+1) these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 @@ -611,7 +658,7 @@ def NDrebin(data: Quantity[ArrayLike], # step_size = [step_size] step_size = np.atleast_1d(step_size) if step_size.size != Ndims: - ValueError("step_size must be None or a 1D iterable of length Ndims.") + raise ValueError("step_size must be None or a 1D iterable of length Ndims.") for ind in range(Ndims): if lower[ind] == upper[ind]: # min and max of limits are the same, i.e. data has to be exactly this From d754cc7d9e2a5ed7df4e5a4c6bd86ccccb79b54f Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 16:34:16 -0500 Subject: [PATCH 07/14] NDrebin class ready for testing --- sasdata/transforms/NDrebin.py | 62 ++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index b7fcbf21..18fcbcce 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -8,35 +8,30 @@ from sasdata.quantities.quantity import Quantity -class binnedNDdata: +class NDRebin: """ N-dimensional rebinning of data into regular bins, with optional fractional binning and error propagation. + Parameters + ---------- + data : Quantity[ArrayLike] + Data values on an Nd grid. + coords : Quantity[ArrayLike] + Coordinates corresponding to data. + data_errs : Quantity[ArrayLike], optional + Errors on data. + axes, upper, lower, step_size, num_bins, fractional + See individual attributes; control bin geometry and binning style. + Typical usage ------------- rebin = NDRebin(data, coords, num_bins=[10, 20]) - rebin.prepare() rebin.run() binned = rebin.binned_data errs = rebin.binned_data_errs """ - def __init__(self, binned_data: Quantity[ArrayLike], - bin_centers_list: list[Quantity[ArrayLike]] | None = None, - binned_data_errs: Quantity[ArrayLike] | None = None, - bins_list: list[Quantity[ArrayLike]] | None = None, - step_size: list[Sequence[float]] | None = None, - num_bins: list[Sequence[int]] | None = None, - ): - self.binned_data = binned_data - self.bin_centers_list = bin_centers_list - self.binned_data_errs = binned_data_errs - self.bins_list = bins_list - self.step_size = step_size - self.num_bins = num_bins - -class NDRebin: def __init__( self, data: Quantity[ArrayLike], @@ -230,9 +225,9 @@ def _make_bins(self): # create the bins in each dimension if self.step_size is None: - self.step_size_from_num_bins() + self._step_size_from_num_bins() else: - self.num_bins_from_step_size() + self._num_bins_from_step_size() def _step_size_from_num_bins(self): # if step_size was not specified, derive from num_bins @@ -333,26 +328,43 @@ def _calculate_bins(self): def _calculate_fractional_bins(self): # more convenient to work with half shifted inds + # bin_inds_frac for bin i is between i-0.5 and i+0.5 and the bin center + # is at i. bin_inds_frac = self.bin_inds - 0.5 # 1. Identify valid rows (no NaNs) valid = ~np.isnan(bin_inds_frac).any(axis=1) valid_inds = bin_inds_frac[valid] partial_weights = 1.-np.mod(valid_inds, 1) + data_valid = self.data_flat[valid] + errs_valid = self.errors_flat[valid] + # In 1D, for a point at x between bin centers at x_i and x_{i+1}, + # wx_{i+1}=(x-x_i)/dx partial weight goes to bin i+1 + # and wx_i=1-w_{i+1} partial weight goes to bin i. + # bin_inds = (x-(x_1-dx/2))/dx = (x-x_1)/dx+0.5. Therefore + # bin_inds_frac = (x-x_1)/dx, so wx_{i+1} = mod(idx,1) and + # wx_i = 1-mod(idx,1) # for each dimension, double the amount of subpoints - # for a point at x_i, 1-x_i goes to for ind in range(self.Ndims): + # bins on the edge only go in one bin on that axis + edge_mask = np.logical_not( + np.logical_or(valid_inds[:, ind]<0, + valid_inds[:, ind]>self.num_bins[ind]-1) + ) + partial_weights[~edge_mask, ind] = 1.0 # will be where the bin goes - arr_mod = valid_inds.copy() + arr_mod = valid_inds[edge_mask] arr_mod[:, ind] += 1. valid_inds = np.vstack([valid_inds, arr_mod]) # how close it is to that bin - arr_mod = partial_weights.copy() + arr_mod = partial_weights[edge_mask] arr_mod[:, ind] = 1. - arr_mod[:, ind] partial_weights = np.vstack([partial_weights, arr_mod]) - + # the value and uncertainty + data_valid = np.concatenate([data_valid, data_valid[edge_mask]]) + errs_valid = np.concatenate([errs_valid, errs_valid[edge_mask]]) # any bins that ended up outside just get clamped for ind in range(self.Ndims): @@ -362,10 +374,6 @@ def _calculate_fractional_bins(self): # weights are the product of partial weights weights = np.prod(partial_weights, axis=1) - # need to tile the data and errs to weight them for each bin - data_valid = np.tile(self.data_flat[valid], 2**self.Ndims) - errs_valid = np.tile(self.errors_flat[valid], 2**self.Ndims) - # 2. Convert valid bins to integer indices inds_int = valid_inds.astype(int) From 2d09754ce057863431e7b8ebdacfda56262daed8 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 17:13:04 -0500 Subject: [PATCH 08/14] added unit testing for NDrebin --- sasdata/transforms/NDrebin.py | 198 ++++++++++++++++--------------- test/transforms/utest_NDrebin.py | 171 ++++++++++++++++++++++---- 2 files changed, 253 insertions(+), 116 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 18fcbcce..5b89d7a7 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -43,6 +43,7 @@ def __init__( step_size: ArrayLike | None = None, num_bins: ArrayLike | None = None, fractional: bool = False, + normalize: bool = True, ): self.data = data self.coords = coords @@ -53,6 +54,7 @@ def __init__( self.step_size = step_size self.num_bins = num_bins self.fractional = fractional + self.normalize = normalize # Internal attributes initialised later self.Nvals: int | None = None @@ -395,8 +397,11 @@ def _calculate_fractional_bins(self): def _norm_data(self): # normalize binned_data by the number of times sampled with np.errstate(divide='ignore', invalid='ignore'): - self.binned_data = np.divide(self.binned_data, self.n_samples) - self.binned_data_errs = np.divide(np.sqrt(self.binned_data_errs), self.n_samples) + if self.normalize: + self.binned_data = np.divide(self.binned_data, self.n_samples) + self.binned_data_errs = np.divide(np.sqrt(self.binned_data_errs), self.n_samples) + else: + self.binned_data_errs = np.sqrt(self.binned_data_errs) # any bins with no samples is nan mask = self.n_samples == 0 @@ -859,17 +864,19 @@ def NDrebin(data: Quantity[ArrayLike], # NDrebin import time start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_1, qmat, - step_size=[0.1,np.inf,np.inf] - ) + rebin = NDRebin(I_1, qmat, step_size=[0.1,np.inf,np.inf]) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {Nvals} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin2, qbin2, *rest = NDrebin(I_1, qmat, - step_size=[0.1,np.inf,np.inf], - fractional = True - ) + rebin = NDRebin(I_1, qmat, step_size=[0.1,np.inf,np.inf], + fractional = True) + rebin.run() + Ibin2 = rebin.binned_data + qbin2 = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") @@ -943,17 +950,21 @@ def NDrebin(data: Quantity[ArrayLike], # You can choose finite steps for both x and y depending on how you want bins defined. import time start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_2D, qmat, - step_size=[0.006, 0.006, np.inf] - ) + rebin = NDRebin(I_2D, qmat, + step_size=[0.006, 0.006, np.inf]) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {qmat.size/3} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin2, qbin2, *rest = NDrebin(I_2D, qmat, + rebin = NDRebin(I_2D, qmat, step_size=[0.0035, 0.0035, np.inf], - fractional=True - ) + fractional=True) + rebin.run() + Ibin2 = rebin.binned_data + qbin2 = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {qmat.size/3} points with fractional binning in {end - start:.6f} seconds") @@ -1001,91 +1012,92 @@ def NDrebin(data: Quantity[ArrayLike], + if True: + # test ND gaussian + Ndims = 3 + mu = np.zeros(Ndims) # Mean vector + sigma = np.random.rand(Ndims) # Std dev in x and y + noise = 0.1 + Nvals = int(1e6) - # test ND gaussian - Ndims = 3 - mu = np.zeros(Ndims) # Mean vector - sigma = np.random.rand(Ndims) # Std dev in x and y - noise = 0.1 - Nvals = int(1e6) - - # Generate random points (x, y) in a 2D square - qmat = -5 + 10 * np.random.rand(Ndims, Nvals) - - # Evaluate 2D Gaussian: - # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) - exp_op = np.zeros(Nvals) - sigma_tot = 1 - for ind in range(Ndims): - exp_op = exp_op -((qmat[ind,:] - mu[ind])**2) / (2 * sigma[ind]**2) - sigma_tot = sigma_tot * sigma[ind] - I_ND = ( - np.exp(exp_op) / - (2 * np.pi * sigma_tot) - ) + # Generate random points (x, y) in a 2D square + qmat = -5 + 10 * np.random.rand(Ndims, Nvals) - # Add uniform noise - I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) - - # Rebin in 2D. - # You can choose finite steps for both x and y depending on how you want bins defined. - import time - start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_ND, qmat, - step_size=0.2*np.random.rand(Ndims)+0.1, - lower=[1,2,3], - upper=[9,8,7] - ) - end = time.perf_counter() - print(f"Computed {Nvals} points in {end - start:.6f} seconds") - - start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_ND, qmat, - step_size=0.2*np.random.rand(Ndims)+0.1, - lower=[1,2,3], - upper=[9,8,7], - fractional=True - ) - end = time.perf_counter() - print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") + # Evaluate 2D Gaussian: + # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) + exp_op = np.zeros(Nvals) + sigma_tot = 1 + for ind in range(Ndims): + exp_op = exp_op -((qmat[ind,:] - mu[ind])**2) / (2 * sigma[ind]**2) + sigma_tot = sigma_tot * sigma[ind] + I_ND = ( + np.exp(exp_op) / + (2 * np.pi * sigma_tot) + ) + # Add uniform noise + I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) + # Rebin in 2D. + # You can choose finite steps for both x and y depending on how you want bins defined. + import time + start = time.perf_counter() + rebin = NDRebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + lower=[1,2,3], + upper=[9,8,7]) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + end = time.perf_counter() + print(f"Computed {Nvals} points in {end - start:.6f} seconds") - # test syntax - Ndims = 4 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, Nvals) - Imat = np.random.rand(Nvals) + start = time.perf_counter() + rebin = NDRebin(I_ND, qmat, + step_size=0.2*np.random.rand(Ndims)+0.1, + lower=[1,2,3], + upper=[9,8,7], + fractional=True) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + end = time.perf_counter() + print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") - Ibin, qbin, *rest = NDrebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - results = NDrebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - Ibin = results[0] - qbin = results[1] - bins_list = results[2] - step_size = results[3] - num_bins = results[4] - # test syntax - Ndims = 2 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, 100, Nvals) - Imat = np.random.rand(100, Nvals) - Imat_errs = np.random.rand(100, Nvals) - binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ - = NDrebin(Imat, qmat, - data_errs = Imat_errs, - num_bins=[10,20], - axes = np.eye(2), - fractional=True - ) + # test syntax + Ndims = 4 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + rebin = NDRebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + + # test syntax + Ndims = 2 + Nvals = int(1e5) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + rebin = NDRebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + Ibin_errs = rebin.binned_data_errs + bins_list = rebin.bins_list + step_size = rebin.step_size + num_bins = rebin.num_bins input() diff --git a/test/transforms/utest_NDrebin.py b/test/transforms/utest_NDrebin.py index ae8f7b70..ccdc5812 100644 --- a/test/transforms/utest_NDrebin.py +++ b/test/transforms/utest_NDrebin.py @@ -3,7 +3,7 @@ import numpy as np from matplotlib import pyplot as plt -from sasdata.transforms.NDrebin import NDrebin +from sasdata.transforms.NDrebin import NDRebin def test_1D_exact(show_plots: bool): @@ -16,8 +16,11 @@ def test_1D_exact(show_plots: bool): Ireal = np.exp(-((xreal - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) # rebin to the exact same bins - Ibin, qbin, *rest = NDrebin(Ireal, xreal, - lower=-5.5, upper=5.5, num_bins=11) + rebin = NDRebin(Ireal, xreal, + lower=-5.5, upper=5.5, num_bins=11) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list assert all(Ibin == Ireal) assert all(qbin[0] == xreal) @@ -36,7 +39,11 @@ def test_1D_exact(show_plots: bool): # rebin to the exact same bins with fractional - Ibin, qbin, *rest = NDrebin(Ireal, xreal, lower=-5.5, upper=5.5, num_bins=11, fractional=True) + rebin = NDRebin(Ireal, xreal, + lower=-5.5, upper=5.5, num_bins=11, fractional=True) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list assert all(Ibin == Ireal) @@ -53,6 +60,119 @@ def test_1D_exact(show_plots: bool): plt.show() + +def test_2D(show_plots: bool): + # Parameters of the 2D Gaussian + mu = np.array([0.15, 0.0, 0.0]) # Mean vector + sigma = np.array([0.015, 0.055, 0.05]) # Std dev in x and y + noise = 0. + SDD = 2.7 + k_0 = 2*np.pi/5 + pix_x = 1./128. + pix_y = 1./128. + + # Generate our 2D detector grid + x = np.arange(-64,64)*pix_x + y = np.arange(-64,64)*pix_y + + [xmesh, ymesh] = np.meshgrid(x, y) + + # calculate qx, qy, qz + qx = k_0*xmesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) + qy = k_0*ymesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) + qz = k_0-k_0*SDD/np.sqrt(xmesh**2+ymesh**2+SDD**2) + + # qmat + qmat0 = np.stack([qx,qy,qz], axis=2) + + # now rotate about y + angle_list = np.pi/180*np.linspace(-15,15,int(30/.25)) + qmat = np.zeros((len(x), len(y), len(angle_list), 3)) + for ind in range(len(angle_list)): + new_qmat = np.copy(qmat0) + new_qmat[:,:,0] = np.cos(angle_list[ind])*qmat0[:,:,0] - \ + np.sin(angle_list[ind])*qmat0[:,:,2] + new_qmat[:,:,2] = np.sin(angle_list[ind])*qmat0[:,:,0] + \ + np.cos(angle_list[ind])*qmat0[:,:,2] + qmat[:,:,ind,:] = qmat0 + + + # Evaluate Gaussian: + # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) + I_2D = ( + np.exp( + -((qmat[:,:,:,0] - mu[0])**2) / (2 * sigma[0]**2) + -((qmat[:,:,:,1] - mu[1])**2) / (2 * sigma[1]**2) + -((qmat[:,:,:,2] - mu[2])**2) / (2 * sigma[2]**2) + ) / + (2 * np.pi * sigma[0] * sigma[1] * sigma[2]) + ) + + # Add uniform noise + I_2D = I_2D - noise + 2 * noise * np.random.rand(*I_2D.shape) + + # Rebin in 2D. + # You can choose finite steps for both x and y depending on how you want bins defined. + start = time.perf_counter() + rebin = NDRebin(I_2D, qmat, + step_size=[0.006, 0.006, np.inf]) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + end = time.perf_counter() + print(f"Computed {qmat.size/3} points in {end - start:.6f} seconds") + + start = time.perf_counter() + rebin = NDRebin(I_2D, qmat, + step_size=[0.0035, 0.0035, np.inf], + fractional=True) + rebin.run() + Ibin2 = rebin.binned_data + qbin2 = rebin.bin_centers_list + end = time.perf_counter() + print(f"Computed {qmat.size/3} points with fractional binning in {end - start:.6f} seconds") + + + if show_plots: + # Fiducial 2D + [xmesh, ymesh] = np.meshgrid(qbin2[0], qbin2[1]) + Ireal = ( + np.exp( + -((xmesh - mu[0])**2) / (2 * sigma[0]**2) + -((ymesh - mu[1])**2) / (2 * sigma[1]**2) + ) / + (2 * np.pi * sigma[0] * sigma[1]) + ) + + # Plot a 1D slice of the binned data along x (y bins aggregated) + plt.figure(figsize=(4,8)) + + plt.subplot(3, 1, 1) + plt.pcolormesh(qbin[0], qbin[1], np.squeeze(Ibin.T), shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('sum') + plt.colorbar() + plt.tight_layout() + + plt.subplot(3, 1, 2) + plt.pcolormesh(qbin2[0], qbin2[1], np.squeeze(Ibin2.T), shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('sum') + plt.colorbar() + plt.tight_layout() + + plt.subplot(3, 1, 3) + plt.pcolormesh(xmesh, ymesh, Ireal, shading='nearest') + plt.xlabel('x') + plt.ylabel('y') + plt.title('real') + plt.colorbar() + plt.tight_layout() + plt.show() + + def test_syntax(): # test syntax Ndims = 4 @@ -60,21 +180,14 @@ def test_syntax(): qmat = np.random.rand(Ndims, Nvals) Imat = np.random.rand(Nvals) - Ibin, qbin, *rest = NDrebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - results = NDrebin(Imat, qmat, + rebin = NDRebin(Imat, qmat, step_size=0.1*np.random.rand(Ndims)+0.05, lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - Ibin = results[0] - qbin = results[1] - bins_list = results[2] - step_size = results[3] - num_bins = results[4] + upper=0.1*np.random.rand(Ndims)+0.9) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + # test syntax Ndims = 2 @@ -83,13 +196,18 @@ def test_syntax(): Imat = np.random.rand(100, Nvals) Imat_errs = np.random.rand(100, Nvals) - binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ - = NDrebin(Imat, qmat, + rebin = NDRebin(Imat, qmat, data_errs = Imat_errs, num_bins=[10,20], axes = np.eye(2), - fractional=True - ) + fractional=True) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + Ibin_errs = rebin.binned_data_errs + bins_list = rebin.bins_list + step_size = rebin.step_size + num_bins = rebin.num_bins # test ND gaussian @@ -121,21 +239,27 @@ def test_ND(): # Rebin in 2D. # You can choose finite steps for both x and y depending on how you want bins defined. start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_ND, qmat, + rebin = NDRebin(I_ND, qmat, step_size=0.2*np.random.rand(Ndims)+0.1, lower=[1,2,3,0], upper=[9,8,7,9.5] ) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {Nvals} points in {end - start:.6f} seconds") start = time.perf_counter() - Ibin, qbin, *rest = NDrebin(I_ND, qmat, + rebin = NDRebin(I_ND, qmat, step_size=0.2*np.random.rand(Ndims)+0.1, lower=[1,2,3,0], upper=[9,8,7,9.5], fractional=True ) + rebin.run() + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list end = time.perf_counter() print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") @@ -143,5 +267,6 @@ def test_ND(): if __name__ == "__main__": test_1D_exact(show_plots=False) + test_2D(show_plots=False) test_syntax() test_ND() From fd08cf50c03585bb7d40ceda044fdb569eee88ad Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 17:14:51 -0500 Subject: [PATCH 09/14] removed temporary testing from NDrebin.py --- sasdata/transforms/NDrebin.py | 283 ---------------------------------- 1 file changed, 283 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 5b89d7a7..75eb7429 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -1,5 +1,4 @@ -import time from collections.abc import Sequence import numpy as np @@ -819,285 +818,3 @@ def NDrebin(data: Quantity[ArrayLike], else: return binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins - - - - - - - - -if __name__ == "__main__": - # a bunch of local testing - import matplotlib.pyplot as plt - import numpy as np - - if True: - # Parameters for the Gaussian function - mu = 0.0 # Mean - sigma = 1.0 # Standard deviation - noise = 0.1 - Nvals = int(1e6) - - # Generate Nvals random values between -5 and 5 - x = -5 + (5 + 5) * np.random.rand(Nvals) # shape: (Nvals,) - - # Build qmat equivalent to MATLAB: - # qmat = [x(:)'; 0*x(:)'; 0*x(:)'] - # → 3 x Nvals array - qmat = np.vstack([ - x, - np.zeros_like(x), - np.zeros_like(x) - ]) - - # Evaluate the Gaussian function - I_1 = np.exp(-((x - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) - - # Add uniform noise in [-noise, +noise] - I_1 = I_1 - noise + 2 * noise * np.random.rand(Nvals) - - # Fiducial - xreal = np.linspace(-5, 5, 1001) - Ireal = np.exp(-((xreal - mu) ** 2) / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) - - # NDrebin - import time - start = time.perf_counter() - rebin = NDRebin(I_1, qmat, step_size=[0.1,np.inf,np.inf]) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {Nvals} points in {end - start:.6f} seconds") - - start = time.perf_counter() - rebin = NDRebin(I_1, qmat, step_size=[0.1,np.inf,np.inf], - fractional = True) - rebin.run() - Ibin2 = rebin.binned_data - qbin2 = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") - - # Plot - plt.figure() - plt.plot(np.squeeze(qbin[0]), np.squeeze(Ibin), 'o', linewidth=2, label='sum') - plt.plot(np.squeeze(qbin2[0]), np.squeeze(Ibin2), 'o', linewidth=2, label='fractional') - plt.plot(xreal, Ireal, 'k-', linewidth=2, label='analytic') - - plt.xlabel('x') - plt.ylabel('I') - plt.legend() - plt.tight_layout() - plt.show(block=False) - - - - - - if True: - # Parameters of the 2D Gaussian - mu = np.array([0.15, 0.0, 0.0]) # Mean vector - sigma = np.array([0.015, 0.055, 0.05]) # Std dev in x and y - noise = 0. - SDD = 2.7 - k_0 = 2*np.pi/5 - pix_x = 1./128. - pix_y = 1./128. - - # Generate our 2D detector grid - x = np.arange(-64,64)*pix_x - y = np.arange(-64,64)*pix_y - - [xmesh, ymesh] = np.meshgrid(x, y) - - # calculate qx, qy, qz - qx = k_0*xmesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) - qy = k_0*ymesh/np.sqrt(xmesh**2+ymesh**2+SDD**2) - qz = k_0-k_0*SDD/np.sqrt(xmesh**2+ymesh**2+SDD**2) - - # qmat - qmat0 = np.stack([qx,qy,qz], axis=2) - - # now rotate about y - angle_list = np.pi/180*np.linspace(-15,15,int(30/.25)) - qmat = np.zeros((len(x), len(y), len(angle_list), 3)) - for ind in range(len(angle_list)): - new_qmat = np.copy(qmat0) - new_qmat[:,:,0] = np.cos(angle_list[ind])*qmat0[:,:,0] - \ - np.sin(angle_list[ind])*qmat0[:,:,2] - new_qmat[:,:,2] = np.sin(angle_list[ind])*qmat0[:,:,0] + \ - np.cos(angle_list[ind])*qmat0[:,:,2] - qmat[:,:,ind,:] = qmat0 - - - # Evaluate Gaussian: - # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) - I_2D = ( - np.exp( - -((qmat[:,:,:,0] - mu[0])**2) / (2 * sigma[0]**2) - -((qmat[:,:,:,1] - mu[1])**2) / (2 * sigma[1]**2) - -((qmat[:,:,:,2] - mu[2])**2) / (2 * sigma[2]**2) - ) / - (2 * np.pi * sigma[0] * sigma[1] * sigma[2]) - ) - - # Add uniform noise - I_2D = I_2D - noise + 2 * noise * np.random.rand(*I_2D.shape) - - # Rebin in 2D. - # You can choose finite steps for both x and y depending on how you want bins defined. - import time - start = time.perf_counter() - rebin = NDRebin(I_2D, qmat, - step_size=[0.006, 0.006, np.inf]) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {qmat.size/3} points in {end - start:.6f} seconds") - - start = time.perf_counter() - rebin = NDRebin(I_2D, qmat, - step_size=[0.0035, 0.0035, np.inf], - fractional=True) - rebin.run() - Ibin2 = rebin.binned_data - qbin2 = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {qmat.size/3} points with fractional binning in {end - start:.6f} seconds") - - - # Fiducial 2D - [xmesh, ymesh] = np.meshgrid(qbin2[0], qbin2[1]) - Ireal = ( - np.exp( - -((xmesh - mu[0])**2) / (2 * sigma[0]**2) - -((ymesh - mu[1])**2) / (2 * sigma[1]**2) - ) / - (2 * np.pi * sigma[0] * sigma[1]) - ) - - # Plot a 1D slice of the binned data along x (y bins aggregated) - plt.figure(figsize=(4,8)) - - plt.subplot(3, 1, 1) - plt.pcolormesh(qbin[0], qbin[1], np.squeeze(Ibin.T), shading='nearest') - plt.xlabel('x') - plt.ylabel('y') - plt.title('sum') - plt.colorbar() - plt.tight_layout() - - plt.subplot(3, 1, 2) - plt.pcolormesh(qbin2[0], qbin2[1], np.squeeze(Ibin2.T), shading='nearest') - plt.xlabel('x') - plt.ylabel('y') - plt.title('sum') - plt.colorbar() - plt.tight_layout() - - plt.subplot(3, 1, 3) - plt.pcolormesh(xmesh, ymesh, Ireal, shading='nearest') - plt.xlabel('x') - plt.ylabel('y') - plt.title('real') - plt.colorbar() - plt.tight_layout() - plt.show(block=False) - - - - - - - if True: - # test ND gaussian - Ndims = 3 - mu = np.zeros(Ndims) # Mean vector - sigma = np.random.rand(Ndims) # Std dev in x and y - noise = 0.1 - Nvals = int(1e6) - - # Generate random points (x, y) in a 2D square - qmat = -5 + 10 * np.random.rand(Ndims, Nvals) - - # Evaluate 2D Gaussian: - # G(x,y) = (1/(2πσxσy)) * exp(-[(x-μx)^2/(2σx^2) + (y-μy)^2/(2σy^2)]) - exp_op = np.zeros(Nvals) - sigma_tot = 1 - for ind in range(Ndims): - exp_op = exp_op -((qmat[ind,:] - mu[ind])**2) / (2 * sigma[ind]**2) - sigma_tot = sigma_tot * sigma[ind] - I_ND = ( - np.exp(exp_op) / - (2 * np.pi * sigma_tot) - ) - - # Add uniform noise - I_ND = I_ND - noise + 2 * noise * np.random.rand(1,Nvals) - - # Rebin in 2D. - # You can choose finite steps for both x and y depending on how you want bins defined. - import time - start = time.perf_counter() - rebin = NDRebin(I_ND, qmat, - step_size=0.2*np.random.rand(Ndims)+0.1, - lower=[1,2,3], - upper=[9,8,7]) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {Nvals} points in {end - start:.6f} seconds") - - start = time.perf_counter() - rebin = NDRebin(I_ND, qmat, - step_size=0.2*np.random.rand(Ndims)+0.1, - lower=[1,2,3], - upper=[9,8,7], - fractional=True) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - end = time.perf_counter() - print(f"Computed {Nvals} points with fractional binning in {end - start:.6f} seconds") - - - - # test syntax - Ndims = 4 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, Nvals) - Imat = np.random.rand(Nvals) - - rebin = NDRebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - - # test syntax - Ndims = 2 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, 100, Nvals) - Imat = np.random.rand(100, Nvals) - Imat_errs = np.random.rand(100, Nvals) - - rebin = NDRebin(Imat, qmat, - data_errs = Imat_errs, - num_bins=[10,20], - axes = np.eye(2), - fractional=True) - rebin.run() - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - Ibin_errs = rebin.binned_data_errs - bins_list = rebin.bins_list - step_size = rebin.step_size - num_bins = rebin.num_bins - - input() From 318ce5db90caebc00260a7328dd1b5b6f4df3b56 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Tue, 18 Nov 2025 17:36:54 -0500 Subject: [PATCH 10/14] cleaned up NDrebin, removed old function, added documentation --- sasdata/transforms/NDrebin.py | 576 +++++++++------------------------- 1 file changed, 140 insertions(+), 436 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 75eb7429..ff5a0ccb 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -1,5 +1,4 @@ -from collections.abc import Sequence import numpy as np from numpy._typing import ArrayLike @@ -12,23 +11,142 @@ class NDRebin: N-dimensional rebinning of data into regular bins, with optional fractional binning and error propagation. + Provide values at points with ND coordinates. + The coordinates may not be in a nice grid. + The data can be in any array shape. + + The coordinates are in the same shape plus one dimension, + preferably the first dimension, which is the ND coordinate + position + + Rebin that data into a regular grid. + + Note that this does lose some information from the underlying data, + as you are essentially averaging multiple measurements into one bin. + + Note that once can use this function to perform integrations over + one or more dimensions by setting the num_bins to 1 or the + step_size to infinity for one or more axes. The integration will + be performed from the lower to upper bound of that axis. + Parameters ---------- data : Quantity[ArrayLike] - Data values on an Nd grid. + Data values in an Nd array. coords : Quantity[ArrayLike] - Coordinates corresponding to data. + The coordinates corresponding to each data point, same size of data + plus one more dimension with the same length as the + dimensionality of the space (Ndim) data_errs : Quantity[ArrayLike], optional - Errors on data. - axes, upper, lower, step_size, num_bins, fractional - See individual attributes; control bin geometry and binning style. + Errors on data. Optional, the same size as data. + axes : ArrayLike | None = None + The axes of the coordinate system we are binning + into. Defaults to diagonal (e.g. (1,0,0), (0,1,0), and + (0,0,1) for 3D data). A list of Ndim element vectors + upper : ArrayLike | None = None + The upper limits along each axis. Defaults to the largest + values in the data if no limits are provided. + A 1D list of Ndims values. + lower : ArrayLike | None = None + The lower limits along each axis. Defaults to the smallest + values in the data if no limits are provided. + A 1D list of Ndims values. + step_size : ArrayLike | None = None + The size of steps along each axis. Supercedes + num_bins. A list of length Ndim. + num_bins : ArrayLike | None = None + The number of bins along each axis. Superceded by + step_size if step_size is provided. At least one of step_size + or num_bins must be provided. + fractional : bool = False + Whether to perform fractional binning or not. Defaults + to false. + -If false, measurements are binned into one bin, + the one they fall within. Roughly a "nearest neighbor" + approach. + -If true, fractional binning will be applied, where + the value of a measurement is distributed to its 2^Ndim + nearest neighbors weighted by proximity. For example, if + a point falls exactly between two bins, its value will be + given to both bins with 50% weight. This is roughly a + "linear interpolation" approach. Tends to do better at + reducing sharp peaks and edges if data is sampled unevenly. + However, this is roughly 2^Ndim times slower since you have + to address each bin 2^Ndim more times. + normalization : bool = True + Whether to normalize (average) the data or not. If false, + the data are just summed into each bin. If true, the weighted + average of all points added to a bin is computed. + + Attributes + ---------- + binned_data : + has size num_bins and is NDimensional, contains + the binned data + bin_centers_list : + is a list of 1D vectors, contains the + axes of the binned data. The coordinates of bin [i,j,k] + is given by + bin_centers_list[0][i]*axes[i]+bin_centers_list[1][j]*axes[j]+ + bin_centers_list[0][k]*axes[k] + binned_data_errs : + has size num_bins and is NDimensional, contains + the propagated errors of the binned_data + bins_list : + is a list of 1D vectors, is similar to bin_centers_list, + but instead contains the edges of the bins, so it is 1 longer + in each dimension + step_size : + is a list of Ndims numbers, contains the step size + along each dimensino + num_bins : + is a list of Ndims numbers, contains the number + of bins along each dimension + + Methods + ------- + run(self): + Bin the data into the defined bins. Typical usage ------------- - rebin = NDRebin(data, coords, num_bins=[10, 20]) + .. code-block:: + # test syntax 1 + Ndims = 4 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + rebin = NDRebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9) + rebin.run() + + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + + + # test syntax 2 + Ndims = 2 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + rebin = NDRebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True) rebin.run() - binned = rebin.binned_data - errs = rebin.binned_data_errs + + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + Ibin_errs = rebin.binned_data_errs + bins_list = rebin.bins_list + step_size = rebin.step_size + num_bins = rebin.num_bins """ def __init__( @@ -74,7 +192,19 @@ def __call__(self): self.run() return self.binned_data - def prepare(self) -> None: + def run(self) -> None: + """Bin the data into the defined bins.""" + if not self._prepared: + self._prepare() + + if self.fractional: + self._calculate_fractional_bins() + else: + self._calculate_bins() + + self._norm_data() + + def _prepare(self) -> None: """Compute derived quantities: shapes, flattened data, bins, indices.""" if self._prepared: return @@ -108,19 +238,6 @@ def prepare(self) -> None: self._prepared = True - - def run(self) -> None: - """Bin the data into the defined bins.""" - if not self._prepared: - self.prepare() - - if self.fractional: - self._calculate_fractional_bins() - else: - self._calculate_bins() - - self._norm_data() - def _check_data_coords(self): """Compute Nvals and Ndims and validate shapes.""" # Identify number of points @@ -283,7 +400,6 @@ def _create_bin_inds(self): self.bin_inds[self.coords_flat[:, ind]> self.bins_list[ind][-1], ind] = np.nan def _calculate_bins(self): - # this is a non-vector way of binning the data: # for ind in range(Nvals): # this_bin_ind = bin_inds[ind,:] @@ -406,415 +522,3 @@ def _norm_data(self): mask = self.n_samples == 0 self.binned_data[mask] = np.nan self.binned_data_errs[mask] = np.nan - - - - -def NDrebin(data: Quantity[ArrayLike], - coords: Quantity[ArrayLike], - data_errs: Quantity[ArrayLike] | None = None, - axes: list[Quantity[ArrayLike]] | None = None, - upper: list[Sequence[float]] | None = None, - lower: list[Sequence[float]] | None = None, - step_size: list[Sequence[float]] | None = None, - num_bins: list[Sequence[int]] | None = None, - fractional: bool | None = False - ): - """ - Provide values at points with ND coordinates. - The coordinates may not be in a nice grid. - The data can be in any array shape. - - The coordinates are in the same shape plus one dimension, - preferably the first dimension, which is the ND coordinate - position - - Rebin that data into a regular grid. - - Note that this does lose some information from the underlying data, - as you are essentially averaging multiple measurements into one bin. - - Note that once can use this function to perform integrations over - one or more dimensions by setting the num_bins to 1 or the - step_size to infinity for one or more axes. The integration will - be performed from the lower to upper bound of that axis. - - :data: The data at each point - :coords: The locations of each data point, same size of data - plus one more dimension with the same length as the - dimensionality of the space (Ndim) - :data_errs: Optional, the same size as data, the uncertainties on data - :axes: The axes of the coordinate system we are binning - into. Defaults to diagonal (e.g. (1,0,0), (0,1,0), and - (0,0,1) for 3D data). A list of Ndim element vectors - :upper: The upper limits along each axis. Defaults to the largest - values in the data if no limits are provided. - A 1D list of Ndims values. - :lower: The lower limits along each axis. Defaults to the smallest - values in the data if no limits are provided. - A 1D list of Ndims values. - :step_size: The size of steps along each axis. Supercedes - num_bins. A list of length Ndim. - :num_bins: The number of bins along each axis. Superceded by - step_size if step_size is provided. At least one of step_size - or num_bins must be provided. - :fractional: Whether to perform fractional binning or not. Defaults - to false. - -If false, measurements are binned into one bin, - the one they fall within. Roughly a "nearest neighbor" - approach. - -If true, fractional binning will be applied, where - the value of a measurement is distributed to its 2^Ndim - nearest neighbors weighted by proximity. For example, if - a point falls exactly between two bins, its value will be - given to both bins with 50% weight. This is roughly a - "linear interpolation" approach. Tends to do better at - reducing sharp peaks and edges if data is sampled unevenly. - However, this is roughly 2^Ndim times slower since you have - to address each bin 2^Ndim more times. - - - Returns: binned_data, bin_centers_list - :binned_data: has size num_bins and is NDimensional, contains - the binned data - :bin_centers_list: is a list of 1D vectors, contains the - axes of the binned data. The coordinates of bin [i,j,k] - is given by - bin_centers_list[0][i]*axes[i]+bin_centers_list[1][j]*axes[j]+ - bin_centers_list[0][k]*axes[k] - :binned_data_errs: has size num_bins and is NDimensional, contains - the propagated errors of the binned_data - :bins_list: is a list of 1D vectors, is similar to bin_centers_list, - but instead contains the edges of the bins, so it is 1 longer - in each dimension - :step_size: is a list of Ndims numbers, contains the step size - along each dimensino - :num_bins: is a list of Ndims numbers, contains the number - of bins along each dimension - - - An example call might be: - .. code-block:: - # test syntax 1 - Ndims = 4 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, Nvals) - Imat = np.random.rand(Nvals) - - Ibin, qbin, *rest = NDrebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - results = NDrebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9 - ) - Ibin = results[0] - qbin = results[1] - bins_list = results[2] - step_size = results[3] - num_bins = results[4] - - - # test syntax 2 - Ndims = 2 - Nvals = int(1e5) - qmat = np.random.rand(Ndims, 100, Nvals) - Imat = np.random.rand(100, Nvals) - Imat_errs = np.random.rand(100, Nvals) - - binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins \ - = NDrebin(Imat, qmat, - data_errs = Imat_errs, - num_bins=[10,20], - axes = np.eye(2), - fractional=True - ) - - """ - - # Identify number of points - Nvals = data.size - - # Identify number of dimensions - Ndims = coords.size / Nvals - - # if Ndims is not an integer value we have a problem - if not Ndims.is_integer(): - raise ValueError("The coords have to have the same shape as " - "the data, plus one more dimension which is " - "length Ndims") - Ndims = int(Ndims) - - # flatten input data to 1D of length Nvals - data_flat = data.reshape(-1) - if data_errs is None: - no_errs = True - errors_flat = 0*data_flat # no errors - else: - no_errs = False - errors_flat = data_errs.reshape(-1) - - if errors_flat.shape != data_flat.shape: - raise ValueError("Data and errors have to have the same shape.") - - # if 1D, need to add a size 1 dimension index to coords - if Ndims == 1: - coords = coords.reshape(-1, 1) - - # check if the first axis of coords is the dimensions axis - if coords.shape[0] == Ndims: - # first check if it is the first axis - dim_axis = 0 - elif coords.shape[-1] == Ndims: - # now check if it is the last axis - dim_axis = -1 - else: - # search if any axis is size Ndims - dim_axis = next(i for i, s in enumerate(coords.shape) if s == Ndims) - - if not coords.shape[dim_axis] == Ndims: - raise ValueError("The coords have to have one dimension which is " - "the dimensionality of the space") - - # flatten coords to size Nvals x Ndims - moved = np.moveaxis(coords, dim_axis, 0) - coords_flat = moved.reshape(Ndims, -1).T - - # if axes are not provided, default to identity - if axes is None: - axes = np.eye(Ndims) - - - # now project the data into the axes - axes_inv = np.linalg.inv(axes) - coords_flat = np.tensordot(coords_flat, axes_inv, axes=([1], [0])) - - - # if limits were not provided, default to the min and max - # coord in each dimension - if upper is None: - upper = np.zeros(Ndims) - for ind in range(Ndims): - upper[ind] = np.max(coords_flat[:,ind]) - if lower is None: - lower = np.zeros(Ndims) - for ind in range(Ndims): - lower[ind] = np.min(coords_flat[:,ind]) - - # if provided just one limit for 1D as a scalar, make it a list - # for formatting purposes - lower = np.atleast_1d(lower) - upper = np.atleast_1d(upper) - # if not isinstance(lower, (list, tuple)): - # lower = [lower] - # print(lower) - # if not isinstance(upper, (list, tuple)): - # upper = [upper] - - # clean up limits - if lower.size != Ndims: - raise ValueError("Lower limits must be None or a 1D iterable of length Ndims.") - if upper.size != Ndims: - raise ValueError("Upper limits must be None or a 1D iterable of length Ndims.") - for ind in range(Ndims): - # if individual limits are nan, inf, none, etc, replace with min/max - if not np.isfinite(lower[ind]): - lower[ind] = np.min(coords_flat[:,ind]) - if not np.isfinite(upper[ind]): - upper[ind] = np.max(coords_flat[:,ind]) - # if any of the limits are in the wrong order, flip them - if lower[ind] > upper[ind]: - temp = lower[ind] - lower[ind] = upper[ind] - upper[ind] = temp - - - # bins_list is a Ndims long list of vectors which are the edges of - # each bin. Each vector is num_bins[i]+1 long - bins_list = [] - - # bin_centers_list is a Ndims long list of vectors which are the centers of - # each bin. Each vector is num_bins[i] long - bin_centers_list = [] - - - - # create the bins in each dimension - if step_size is None: - # if step_size was not specified, derive from num_bins - step_size = [] - # if provided just one num_bin for 1D as a scalar, make it a list - # for formatting purposes - # if not isinstance(num_bins, (list, tuple)): - # num_bins = [num_bins] - num_bins = np.atleast_1d(num_bins) - if num_bins.size != Ndims: - raise ValueError("num_bins must be None or a 1D iterable of length Ndims.") - for ind in range(Ndims): - these_bins = np.linspace(lower[ind], upper[ind], num_bins[ind]+1) - these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 - this_step_size = these_bins[1] - these_bins[0] - - bins_list.append(these_bins) - bin_centers_list.append(these_centers) - step_size.append(this_step_size) - else: - # else use step_size and derive num_bins - num_bins = [] - # if provided just one step_size for 1D as a scalar, make it a list - # for formatting purposes - # if not isinstance(step_size, (list, tuple)): - # step_size = [step_size] - step_size = np.atleast_1d(step_size) - if step_size.size != Ndims: - raise ValueError("step_size must be None or a 1D iterable of length Ndims.") - for ind in range(Ndims): - if lower[ind] == upper[ind]: - # min and max of limits are the same, i.e. data has to be exactly this - these_bins = np.array([lower[ind], lower[ind]]) - else: - these_bins = np.arange(lower[ind], upper[ind], step_size[ind]) - if these_bins[-1] != upper[ind]: - these_bins = np.append(these_bins, upper[ind]) - these_centers = (these_bins[:-1] + these_bins[1:]) / 2.0 - this_num_bins = these_bins.size-1 - - bins_list.append(these_bins) - bin_centers_list.append(these_centers) - num_bins.append(this_num_bins) - - - # create the binned data matrix of size num_bins[0] x num_bins[1] x ... - # binned_data = np.zeros(num_bins) - # binned_data_errs = np.zeros(num_bins) - # n_samples = np.zeros(num_bins) - - # create the bin inds for each data point as a Nvals x Ndims long vector - bin_inds = np.zeros((Nvals, Ndims)) - for ind in range(Ndims): - this_min = bins_list[ind][0] - this_step = step_size[ind] - bin_inds[:, ind] = (coords_flat[:,ind] - this_min) / this_step - # any that are outside the bin limits should be removed - bin_inds[coords_flat[:, ind]bins_list[ind][-1], ind] = np.nan - - if not fractional: - # this is a non-vector way of binning the data: - # for ind in range(Nvals): - # this_bin_ind = bin_inds[ind,:] - # if not np.isnan(this_bin_ind).any(): - # this_bin_ind = this_bin_ind.astype(int) - # binned_data[*this_bin_ind] = binned_data[*this_bin_ind] + data_flat[ind] - # binned_data_errs[*this_bin_ind] = binned_data_errs[*this_bin_ind] + errors_flat[ind]**2 - # n_samples[*this_bin_ind] = n_samples[*this_bin_ind] + 1 - - - # and here is a vector equivalent - # ------------------------------------------------------------- - # Inputs: - # bin_inds : (Nvals, Ndims) array of indices, some rows may contain NaN - # data_flat : (Nvals,) values to accumulate - # errors_flat : (Nvals,) errors to accumulate (squared) - # binned_data : Ndims-dimensional array (output) - # binned_data_errs : Ndims-dimensional array (output) - # n_samples : Ndims-dimensional array (output) - # ------------------------------------------------------------- - - # 1. Identify valid rows (no NaNs) - valid = ~np.isnan(bin_inds).any(axis=1) - - # 2. Convert valid bins to integer indices - inds_int = bin_inds[valid].astype(int) - - # 3. Map multidimensional indices → flat indices - flat_idx = np.ravel_multi_index(inds_int.T, dims=num_bins) - - # 4. Use bincount to accumulate in a vectorized way - size = np.prod(num_bins) - - bd_sum = np.bincount(flat_idx, weights=data_flat[valid], minlength=size) - err_sum = np.bincount(flat_idx, weights=errors_flat[valid]**2, minlength=size) - ns_sum = np.bincount(flat_idx, minlength=size) - - # 5. Reshape and add into the original arrays - binned_data = bd_sum.reshape(num_bins) - binned_data_errs = err_sum.reshape(num_bins) - n_samples = ns_sum.reshape(num_bins) - - - else: - # more convenient to work with half shifted inds - bin_inds = bin_inds - 0.5 - - # 1. Identify valid rows (no NaNs) - valid = ~np.isnan(bin_inds).any(axis=1) - valid_inds = bin_inds[valid] - partial_weights = 1.-np.mod(valid_inds, 1) - - - # for each dimension, double the amount of subpoints - # for a point at x_i, 1-x_i goes to - for ind in range(Ndims): - # will be where the bin goes - arr_mod = valid_inds.copy() - arr_mod[:, ind] += 1. - valid_inds = np.vstack([valid_inds, arr_mod]) - # how close it is to that bin - arr_mod = partial_weights.copy() - arr_mod[:, ind] = 1. - arr_mod[:, ind] - partial_weights = np.vstack([partial_weights, arr_mod]) - - - # any bins that ended up outside just get clamped - for ind in range(Ndims): - valid_inds[valid_inds[:, ind]<0, ind] = 0 - valid_inds[valid_inds[:, ind]>num_bins[ind]-1, ind] = num_bins[ind]-1 - - # weights are the product of partial weights - weights = np.prod(partial_weights, axis=1) - - # need to tile the data and errs to weight them for each bin - data_valid = np.tile(data_flat[valid], 2**Ndims) - errs_valid = np.tile(errors_flat[valid], 2**Ndims) - - # 2. Convert valid bins to integer indices - inds_int = valid_inds.astype(int) - - # 3. Map multidimensional indices → flat indices - flat_idx = np.ravel_multi_index(inds_int.T, dims=num_bins) - - # 4. Use bincount to accumulate in a vectorized way - size = np.prod(num_bins) - - bd_sum = np.bincount(flat_idx, weights=weights*data_valid, minlength=size) - err_sum = np.bincount(flat_idx, weights=(weights**2)*(errs_valid**2), minlength=size) - ns_sum = np.bincount(flat_idx, weights=weights, minlength=size) - - # 5. Reshape and add into the original arrays - binned_data = bd_sum.reshape(num_bins) - binned_data_errs = err_sum.reshape(num_bins) - n_samples = ns_sum.reshape(num_bins) - - - - - # normalize binned_data by the number of times sampled - with np.errstate(divide='ignore', invalid='ignore'): - binned_data = np.divide(binned_data, n_samples) - binned_data_errs = np.divide(np.sqrt(binned_data_errs), n_samples) - - # any bins with no samples is nan - binned_data[n_samples==0] = np.nan - binned_data_errs[n_samples==0] = np.nan - - - if no_errs: - return binned_data, bin_centers_list, bins_list, step_size, num_bins - else: - return binned_data, bin_centers_list, binned_data_errs, bins_list, step_size, num_bins - From e29c76ea5022ce5b282494a44e15a994170fefa2 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Mon, 1 Dec 2025 16:38:50 -0500 Subject: [PATCH 11/14] refactor NDRebin._build_limits to pass codescene review. vectorized and removed for loops and most conditional logic --- sasdata/transforms/NDrebin.py | 36 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index ff5a0ccb..435cc155 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -301,36 +301,32 @@ def _project_axes(self): def _build_limits(self): # if limits were not provided, default to the min and max # coord in each dimension - if self.upper is None: - self.upper = np.zeros(self.Ndims) - for ind in range(self.Ndims): - self.upper[ind] = np.max(self.coords_flat[:,ind]) - if self.lower is None: - self.lower = np.zeros(self.Ndims) - for ind in range(self.Ndims): - self.lower[ind] = np.min(self.coords_flat[:,ind]) + coords = self.coords_flat + mins = np.min(coords, axis=0) + maxs = np.max(coords, axis=0) + lower = mins if self.lower is None else self.lower + upper = maxs if self.upper is None else self.upper # if provided just one limit for 1D as a scalar, make it a list # for formatting purposes self.lower = np.atleast_1d(self.lower) self.upper = np.atleast_1d(self.upper) - # clean up limits + # validate limits sizes if self.lower.size != self.Ndims: raise ValueError("Lower limits must be None or a 1D iterable of length Ndims.") if self.upper.size != self.Ndims: raise ValueError("Upper limits must be None or a 1D iterable of length Ndims.") - for ind in range(self.Ndims): - # if individual limits are nan, inf, none, etc, replace with min/max - if not np.isfinite(self.lower[ind]): - self.lower[ind] = np.min(self.coords_flat[:,ind]) - if not np.isfinite(self.upper[ind]): - self.upper[ind] = np.max(self.coords_flat[:,ind]) - # if any of the limits are in the wrong order, flip them - if self.lower[ind] > self.upper[ind]: - temp = self.lower[ind] - self.lower[ind] = self.upper[ind] - self.upper[ind] = temp + + # if individual limits are nan, inf, none, etc, replace with min/max + finite_lower = np.isfinite(lower) + finite_upper = np.isfinite(upper) + lower = np.where(finite_lower, lower, mins) + upper = np.where(finite_upper, upper, maxs) + + # if any of the limits are in the wrong order, flip them + self.lower = np.minimum(lower, upper) + self.upper = np.maximum(lower, upper) def _make_bins(self): # bins_list is a Ndims long list of vectors which are the edges of From 48f55bdc61811ac735ce6557459d1a287a8906ff Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Mon, 1 Dec 2025 16:51:20 -0500 Subject: [PATCH 12/14] fixed bug in _build_limits --- sasdata/transforms/NDrebin.py | 6 ++++-- test/transforms/utest_NDrebin.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 435cc155..56a4b7b3 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -311,11 +311,13 @@ def _build_limits(self): # for formatting purposes self.lower = np.atleast_1d(self.lower) self.upper = np.atleast_1d(self.upper) + lower = np.atleast_1d(lower).astype(float, copy=True) + upper = np.atleast_1d(upper).astype(float, copy=True) # validate limits sizes - if self.lower.size != self.Ndims: + if lower.size != self.Ndims: raise ValueError("Lower limits must be None or a 1D iterable of length Ndims.") - if self.upper.size != self.Ndims: + if upper.size != self.Ndims: raise ValueError("Upper limits must be None or a 1D iterable of length Ndims.") # if individual limits are nan, inf, none, etc, replace with min/max diff --git a/test/transforms/utest_NDrebin.py b/test/transforms/utest_NDrebin.py index ccdc5812..e5365a95 100644 --- a/test/transforms/utest_NDrebin.py +++ b/test/transforms/utest_NDrebin.py @@ -266,7 +266,7 @@ def test_ND(): if __name__ == "__main__": - test_1D_exact(show_plots=False) - test_2D(show_plots=False) + test_1D_exact(show_plots=True) + test_2D(show_plots=True) test_syntax() test_ND() From d44ccdbe2c73f035aaa0e9b911e650f0bab7c5be Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Wed, 3 Dec 2025 11:43:46 -0500 Subject: [PATCH 13/14] indented code block, clarified non-vector comment, fixed typo, removed return in NDRebin __call__ --- sasdata/transforms/NDrebin.py | 77 ++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 56a4b7b3..10b499ee 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -98,7 +98,7 @@ class NDRebin: in each dimension step_size : is a list of Ndims numbers, contains the step size - along each dimensino + along each dimension num_bins : is a list of Ndims numbers, contains the number of bins along each dimension @@ -111,42 +111,42 @@ class NDRebin: Typical usage ------------- .. code-block:: - # test syntax 1 - Ndims = 4 - Nvals = int(1e4) - qmat = np.random.rand(Ndims, Nvals) - Imat = np.random.rand(Nvals) - - rebin = NDRebin(Imat, qmat, - step_size=0.1*np.random.rand(Ndims)+0.05, - lower=0.1*np.random.rand(Ndims)+0.0, - upper=0.1*np.random.rand(Ndims)+0.9) - rebin.run() - - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - - - # test syntax 2 - Ndims = 2 - Nvals = int(1e4) - qmat = np.random.rand(Ndims, 100, Nvals) - Imat = np.random.rand(100, Nvals) - Imat_errs = np.random.rand(100, Nvals) - - rebin = NDRebin(Imat, qmat, - data_errs = Imat_errs, - num_bins=[10,20], - axes = np.eye(2), - fractional=True) - rebin.run() - - Ibin = rebin.binned_data - qbin = rebin.bin_centers_list - Ibin_errs = rebin.binned_data_errs - bins_list = rebin.bins_list - step_size = rebin.step_size - num_bins = rebin.num_bins + # test syntax 1 + Ndims = 4 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, Nvals) + Imat = np.random.rand(Nvals) + + rebin = NDRebin(Imat, qmat, + step_size=0.1*np.random.rand(Ndims)+0.05, + lower=0.1*np.random.rand(Ndims)+0.0, + upper=0.1*np.random.rand(Ndims)+0.9) + rebin.run() + + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + + + # test syntax 2 + Ndims = 2 + Nvals = int(1e4) + qmat = np.random.rand(Ndims, 100, Nvals) + Imat = np.random.rand(100, Nvals) + Imat_errs = np.random.rand(100, Nvals) + + rebin = NDRebin(Imat, qmat, + data_errs = Imat_errs, + num_bins=[10,20], + axes = np.eye(2), + fractional=True) + rebin.run() + + Ibin = rebin.binned_data + qbin = rebin.bin_centers_list + Ibin_errs = rebin.binned_data_errs + bins_list = rebin.bins_list + step_size = rebin.step_size + num_bins = rebin.num_bins """ def __init__( @@ -398,7 +398,8 @@ def _create_bin_inds(self): self.bin_inds[self.coords_flat[:, ind]> self.bins_list[ind][-1], ind] = np.nan def _calculate_bins(self): - # this is a non-vector way of binning the data: + # For readibility, this is a non-vector way of binning the data + # which in the following will be vectorized for efficiency: # for ind in range(Nvals): # this_bin_ind = bin_inds[ind,:] # if not np.isnan(this_bin_ind).any(): From 8ea93eed41f891e4bb6637f4e8a5d1cec216f620 Mon Sep 17 00:00:00 2001 From: Paul Neves Date: Wed, 3 Dec 2025 11:43:59 -0500 Subject: [PATCH 14/14] indented code block, clarified non-vector comment, fixed typo, removed return in NDRebin __call__ --- sasdata/transforms/NDrebin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sasdata/transforms/NDrebin.py b/sasdata/transforms/NDrebin.py index 10b499ee..e5091a8e 100644 --- a/sasdata/transforms/NDrebin.py +++ b/sasdata/transforms/NDrebin.py @@ -190,7 +190,6 @@ def __init__( def __call__(self): self.run() - return self.binned_data def run(self) -> None: """Bin the data into the defined bins."""