diff --git a/modules/Workflow/rWHALE.py b/modules/Workflow/rWHALE.py index 2e3686152..77dbb7e30 100644 --- a/modules/Workflow/rWHALE.py +++ b/modules/Workflow/rWHALE.py @@ -202,7 +202,12 @@ def main( # noqa: C901, D103 # now for each asset, do regional mapping for asset_type, assetIt in asset_files.items(): # noqa: N806 - WF.perform_regional_mapping(assetIt, asset_type) + if not isinstance(WF.shared_data['RegionalEvent']['eventFile'], list): + WF.shared_data['RegionalEvent']['eventFile'] = [ + WF.shared_data['RegionalEvent']['eventFile'] + ] + for event_grid in WF.shared_data['RegionalEvent']['eventFile']: + WF.perform_regional_mapping(assetIt, asset_type, event_grid) if parallelType == 'parSETUP': return diff --git a/modules/Workflow/whale/main.py b/modules/Workflow/whale/main.py index 3b35150eb..c8c2a88dd 100644 --- a/modules/Workflow/whale/main.py +++ b/modules/Workflow/whale/main.py @@ -1765,7 +1765,7 @@ def perform_regional_recovery(self, asset_keys): # noqa: ARG002 ) log_div() - def perform_regional_mapping(self, AIM_file_path, assetType, doParallel=True): # noqa: FBT002, N803 + def perform_regional_mapping(self, AIM_file_path, assetType, event_grid, doParallel=True): # noqa: FBT002, N803 """Performs the regional mapping between the asset and a hazard event. Parameters @@ -1787,7 +1787,7 @@ def perform_regional_mapping(self, AIM_file_path, assetType, doParallel=True): 'id': 'filenameEVENTgrid', 'type': 'path', 'default': resolve_path( - self.shared_data['RegionalEvent']['eventFile'], + event_grid, self.reference_dir, ), } diff --git a/modules/createEVENT/SimCenterEvent/SimCenterEvent.py b/modules/createEVENT/SimCenterEvent/SimCenterEvent.py index baf660934..031f5b259 100644 --- a/modules/createEVENT/SimCenterEvent/SimCenterEvent.py +++ b/modules/createEVENT/SimCenterEvent/SimCenterEvent.py @@ -74,8 +74,138 @@ def write_RV(AIM_file, EVENT_file): # noqa: N802, N803, D103 # get the location of the event input files # TODO: assuming a single event for now # noqa: TD002 aim_event_input = aim_file['Events'][0] - data_dir = Path(aim_event_input['EventFolderPath']) + if 'SimCenterEvent' in aim_event_input: + simcenter_event = aim_event_input['SimCenterEvent'] + data_dir = Path(simcenter_event['EventFolderPath']) + + event_file = {'randomVariables': [], 'Events': []} + # the event should be sampled during hazard to asset mapping, so not random + # variables are needed + event_file['Events'].append({ + "data_dir": str(data_dir), + "unitScaleFactor": f_scale_units, + 'units': input_unit_bases, + }) + + ################Below are for future multiple event_type use############ + # # Get the number of realizations + # if 'intensityMeasure' in simcenter_event: + # num_of_realizations = len(simcenter_event['intensityMeasure']['Events'][0]) + # if 'timeHistory' in simcenter_event: + # if num_of_realizations != len(simcenter_event['timeHistory']['Events'][0]): + # msg = 'Number of realizations in intensityMeasure and timeHistory do not match' + # raise ValueError(msg) + # elif 'timeHistory' in simcenter_event: + # num_of_realizations = len(simcenter_event['timeHistory']['Events'][0]) + # else: + # msg = 'No intensityMeasure or timeHistory in SimCenterEvent' + # raise ValueError(msg) + + # # currently assume only intensityMeasure or timeHistory + # if 'intensityMeasure' in simcenter_event: + # event_type = 'intensityMeasure' + # elif 'timeHistory' in simcenter_event: + # event_type = 'timeHistory' + # else: + # msg = 'No intensityMeasure or timeHistory in SimCenterEvent' + # raise ValueError(msg) + + # if num_of_realizations > 1: + # # if there is more than one event then we need random variables + + # # initialize the randomVariables part of the EVENT file + # if event_type == 'intensityMeasure': + # event_file['randomVariables'].append( + # { + # 'distribution': 'discrete_design_set_string', + # 'name': 'eventID', + # 'value': 'RV.eventID', + # 'elements': ['event_' + str(i) for i in range(num_of_realizations)], + # } + # ) + + # # initialize the Events part of the EVENT file + # event_file['Events'].append( + # { + # 'type': event_type, + # 'event_id': 'RV.eventID', + # 'unitScaleFactor': f_scale_units, + # 'units': input_unit_bases, + # 'values': simcenter_event[event_type]['Events'], + # 'labels': simcenter_event[event_type]['Labels'], + # } + # ) + # elif event_type == 'timeHistory': + # event_file['randomVariables'].append( + # { + # 'distribution': 'discrete_design_set_string', + # 'name': 'eventID', + # 'value': 'RV.eventID', + # 'elements': [], + # } + # ) + + # # initialize the Events part of the EVENT file + # event_file['Events'].append( + # { + # # 'type': 'Seismic', I am pretty sure we are not using this now + # # or we are using it incorrectly, so I removed it for the time being + # # and replaced it with the information that is actually used + # 'type': aim_event_input['type'], + # 'event_id': 'RV.eventID', + # 'unitScaleFactor': f_scale_units, + # 'units': input_unit_bases, + # 'data_dir': str(data_dir), + # } + # ) + + # # collect the filenames + # RV_elements = simcenter_event[event_type]['Events'][0] # noqa: N806 + # # for event in events: + # # #if event['EventClassification'] in ['Earthquake', 'Hurricane', + # # # 'Flood']: + # # #RV_elements.append(event['fileName']) + # # RV_elements.append(event[0]) + + # # and add them to the list of randomVariables + # event_file['randomVariables'][0]['elements'] = RV_elements + + # # if time histories are used, then load the first event + # # TODO: this is needed by some other code that should be fixed and this # noqa: TD002 + # # part should be removed. + # event_file['Events'][0].update({'scale_factors': simcenter_event[event_type]['ScaleFactors']}) + # event_file['Events'][0].update( + # load_record(simcenter_event[event_type]['Events'][0][0], data_dir, empty=num_of_realizations > 1) + # ) + # # , event_class = event_class)) + + # else: + # # if there is only one event, then we do not need random variables + + # # initialize the Events part of the EVENT file + # # The events are now two dimensiontal list. The first dimension is sequence of events + # # The second dimension is different grid in the same event + + # event_file['Events'].append( + # { + # # 'type': 'Seismic', + # 'type': event_type, + # 'event_id': simcenter_event[event_type]['Events'][0][0], + # 'unitScaleFactor': f_scale_units, + # 'units': input_unit_bases, + # 'data_dir': str(data_dir), + # } + # ) + ######################################################################## + + # save the EVENT dictionary to a json file + with open(EVENT_file, 'w', encoding='utf-8') as f: # noqa: PTH123 + json.dump(event_file, f, indent=2) + + return + # Below are for backward compatibility + data_dir = Path(aim_event_input['EventFolderPath']) # get the list of events assigned to this asset events = aim_event_input['Events'] diff --git a/modules/performRegionalMapping/NearestNeighborEvents/NNE.py b/modules/performRegionalMapping/NearestNeighborEvents/NNE.py index 37433796b..95a01a5ad 100644 --- a/modules/performRegionalMapping/NearestNeighborEvents/NNE.py +++ b/modules/performRegionalMapping/NearestNeighborEvents/NNE.py @@ -47,7 +47,15 @@ import pandas as pd from sklearn.neighbors import NearestNeighbors import geopandas as gpd +from pyproj import CRS +def load_sc_geojson(file_path): + # Read the GeoJSON into a dictionary + with Path(file_path).open() as f: + geojson_data = json.load(f) + crs = CRS.from_user_input(geojson_data['crs']['properties']['name']) + # Create a GeoDataFrame from the GeoJSON + return gpd.GeoDataFrame.from_features(geojson_data['features'], crs=crs) def find_neighbors( # noqa: C901, D103 asset_file, @@ -87,6 +95,244 @@ def find_neighbors( # noqa: C901, D103 # Check if the file is a CSV or a GIS file file_extension = Path(event_grid_file).suffix.lower() + if file_extension == '.geojson': + # Read the simcenter geojson file + gdf = load_sc_geojson(event_dir / event_grid_file) + + # Ensure the geojson file is in a geographic coordinate system + if not gdf.crs.is_geographic: + gdf = gdf.to_crs(epsg=4326) + + lat_e = gdf['geometry'].apply(lambda pt: pt.y) + lon_e = gdf['geometry'].apply(lambda pt: pt.x) + grid_locations = np.array([[lo, la] for lo, la in zip(lon_e, lat_e)]) + # If there is any filter_label (a asset will only consider neighbors with the same filter_label) + # any gdf column named with 'filter_xxx' will be considered as filter_label + if filter_label == '': + grid_extra_keys = [key for key in gdf.columns if key.startswith('filter_')] + + hazard_columns = gdf.columns + hazard_columns = hazard_columns.drop(['geometry', *grid_extra_keys]) + # Get the number of events as the length of the first column, first row + if isinstance(gdf[hazard_columns[0]].iloc[0], list): + event_count = len(gdf[hazard_columns[0]].iloc[0]) + else: + event_count = 1 + # check if all rows and all columns have the same number of events + for col in hazard_columns: + for i in range(len(gdf)): + # If event count is 1, convert the data to list + if event_count == 1 and (not isinstance(gdf[col].iloc[i], list)): + gdf[col].iloc[i] = [gdf[col].iloc[i]] + if len(gdf[col].iloc[i]) != event_count: + msg = 'Each grid point must have the same number of events for each hazard input field.' + f'The grid point {i} have a different number of events for the hazard input field {col}.' + f'The number of events for the first grid point of field {col} is {event_count}.' + raise ValueError( + msg + ) + + # prepare the tree for the nearest neighbor search + if filter_label != '' or len(grid_extra_keys) > 0: + neighbors_to_get = min(neighbors * 10, len(lon_e)) + else: + neighbors_to_get = neighbors + + nbrs = NearestNeighbors(n_neighbors=neighbors_to_get, algorithm='ball_tree').fit( + grid_locations + ) + # load the building data file + with open(asset_file, encoding='utf-8') as f: # noqa: PTH123 + asset_dict = json.load(f) + + # prepare a dataframe that holds asset filenames and locations + aim_df = pd.DataFrame( + columns=['Latitude', 'Longitude', 'file'], index=np.arange(len(asset_dict)) + ) + + count = 0 + for i, asset in enumerate(asset_dict): + if run_parallel == False or (i % num_processes) == process_id: # noqa: E712 + with open(asset['file'], encoding='utf-8') as f: # noqa: PTH123 + asset_data = json.load(f) + + asset_loc = asset_data['GeneralInformation']['location'] + aim_id = aim_df.index[count] + aim_df.loc[aim_id, 'Longitude'] = asset_loc['longitude'] + aim_df.loc[aim_id, 'Latitude'] = asset_loc['latitude'] + aim_df.loc[aim_id, 'file'] = asset['file'] + count = count + 1 + + # store building locations in bldg_locations + bldg_locations = np.array( + [ + [lo, la] + for lo, la in zip(aim_df['Longitude'], aim_df['Latitude']) + if not np.isnan(lo) and not np.isnan(la) + ] + ) + + # collect the neighbor indices and distances for every building + distances, indices = nbrs.kneighbors(bldg_locations) + distances = distances + 1e-20 + + # initialize the random generator + if seed is not None: + rng = np.random.default_rng(seed) + else: + rng = np.random.default_rng() + + count = 0 + + # iterate through the buildings and store the selected events in the AIM + for asset_i, (aim_id, dist_list, ind_list) in enumerate( # noqa: B007 + zip(aim_df.index, distances, indices) + ): + # open the AIM file + aim_index_id = aim_df.index[aim_id] + asst_file = aim_df.loc[aim_index_id, 'file'] + + with open(asst_file, encoding='utf-8') as f: # noqa: PTH123 + asset_data = json.load(f) + + # save the event dictionary to the AIM + # TODO: we assume there is only one event # noqa: TD002 + # handling multiple events will require more sophisticated inputs + if 'Events' not in asset_data: + asset_data['Events'] = [{'SimCenterEvent':{ + 'EventFolderPath': str(event_dir), + 'Values': [], + 'Labels': [], + }}] + if filter_label != '': + # soil type of building + asset_label = asset_data['GeneralInformation'][filter_label] + # soil types of all initial neighbors + grid_label = gdf[filter_label][ind_list] + + # only keep the distances and indices corresponding to neighbors + # with the same soil type + dist_list = dist_list[(grid_label == asset_label).values] # noqa: PD011, PLW2901 + ind_list = ind_list[(grid_label == asset_label).values] # noqa: PD011, PLW2901 + + # return dist_list & ind_list with a length equals neighbors + # assuming that at least neighbors grid points exist with + # the same filter_label as the building + + # because dist_list, ind_list sorted initially in order of increasing + # distance, just take the first neighbors grid points of each + dist_list = dist_list[:neighbors] # noqa: PLW2901 + ind_list = ind_list[:neighbors] # noqa: PLW2901 + + if len(grid_extra_keys) > 0: + filter_labels = [] + for key in asset_data['GeneralInformation'].keys(): # noqa: SIM118 + if key in grid_extra_keys: + filter_labels.append(key) # noqa: PERF401 + + filter_list = [True for i in dist_list] + for filter_label in filter_labels: # noqa: PLR1704 + asset_label = asset_data['GeneralInformation'][filter_label] + grid_label = gdf[filter_label][ind_list] + filter_list_i = (grid_label == asset_label).values # noqa: PD011 + filter_list = filter_list and filter_list_i + + # only keep the distances and indices corresponding to neighbors + # with the same soil type + dist_list = dist_list[filter_list] # noqa: PLW2901 + ind_list = ind_list[filter_list] # noqa: PLW2901 + + # return dist_list & ind_list with a length equals neighbors + # assuming that at least neighbors grid points exist with + # the same filter_label as the building + + # because dist_list, ind_list sorted initially in order of increasing + # distance, just take the first neighbors grid points of each + dist_list = dist_list[:neighbors] # noqa: PLW2901 + ind_list = ind_list[:neighbors] # noqa: PLW2901 + + # calculate the weights for each neighbor based on their distance + dist_list = 1.0 / (dist_list**2.0) # noqa: PLW2901 + weights = np.array(dist_list) / np.sum(dist_list) + + # Create one grid for each hazard input field + label_list = asset_data['Events'][0]['SimCenterEvent']['Labels'] + label_list += list(hazard_columns) + + # get the pre-defined number of samples for each neighbor + nbr_samples = np.where(rng.multinomial(1, weights, samples) == 1)[1] + + values_list = asset_data['Events'][0]['SimCenterEvent']['Values'] + for sample_j, nbr in enumerate(nbr_samples): + # make sure we resample events if samples > event_count + event_j = sample_j % event_count + + # get the index of the nth neighbor + nbr_index = ind_list[nbr] + if sample_j >= len(values_list): + values_list.append([]) + value_i = values_list[sample_j] + for hazard_column in hazard_columns: + value_i.append(gdf.iloc[nbr_index][hazard_column][event_j]) + asset_data['Events'][0]['SimCenterEvent']['Values'] = values_list + asset_data['Events'][0]['SimCenterEvent']['Labels'] = label_list + + # Below are for future use when multiple event applications can be used + # for hazard_column in hazard_columns: + # # If the column name ends with '_file', it is a time history file + # if hazard_column.endswith('_file'): + # column_type = 'timeHistory' + # if column_type not in asset_data['Events'][0]['SimCenterEvent']: + # asset_data['Events'][0]['SimCenterEvent'][column_type] = { + # 'Events': [], + # 'ScaleFactors': [], + # 'Labels': [], + # } + # else: + # column_type = 'intensityMeasure' + # if column_type not in asset_data['Events'][0]['SimCenterEvent']: + # asset_data['Events'][0]['SimCenterEvent'][column_type] = { + # 'Events': [], + # 'Labels': [], + # } + # value_list = [] + # scale_list = [] + # for sample_j, nbr in enumerate(nbr_samples): + # # make sure we resample events if samples > event_count + # event_j = sample_j % event_count + + # # get the index of the nth neighbor + # nbr_index = ind_list[nbr] + # # if the grid has ground motion records... + # if column_type == 'timeHistory': + # # load the file for the selected grid point + # event_collection_file = gdf.iloc[nbr_index]['GP_file'] + # event_df = pd.read_csv( + # event_dir / event_collection_file, header=0 + # ) + + # # append the GM record name to the event list + # value_list.append(event_df.iloc[event_j, 0]) + + # # append the scale factor (or 1.0) to the scale list + # if len(event_df.columns) > 1: + # scale_list.append(float(event_df.iloc[event_j, 1])) + # else: + # scale_list.append(1.0) + + # # if the grid has intensity measures + # elif column_type == 'intensityMeasure': + # im_data_j = gdf.iloc[nbr_index][hazard_column] + # value_list.append(im_data_j[event_j]) + # asset_data['Events'][0]['SimCenterEvent'][column_type]['Events'].append(value_list) + # asset_data['Events'][0]['SimCenterEvent'][column_type]['Labels'].append(hazard_column) + + with open(asst_file, 'w', encoding='utf-8') as f: # noqa: PTH123 + json.dump(asset_data, f, indent=2) + + return + + # Below are for backward compatibility if file_extension == '.csv': # Existing code for CSV files grid_df = pd.read_csv(event_dir / event_grid_file, header=0) @@ -100,7 +346,6 @@ def find_neighbors( # noqa: C901, D103 grid_extra_keys = list( grid_df.drop(['GP_file', 'Longitude', 'Latitude'], axis=1).columns ) - else: # Else assume GIS files - works will all gis files that geopandas supports gdf = gpd.read_file(event_dir / event_grid_file) @@ -376,12 +621,10 @@ def find_neighbors( # noqa: C901, D103 # save the event dictionary to the AIM # TODO: we assume there is only one event # noqa: TD002 # handling multiple events will require more sophisticated inputs - if 'Events' not in asset_data: asset_data['Events'] = [{}] elif len(asset_data['Events']) == 0: asset_data['Events'].append({}) - asset_data['Events'][0].update( { # "EventClassification": "Earthquake", diff --git a/modules/performSIMULATION/IMasEDP/IMasEDP.py b/modules/performSIMULATION/IMasEDP/IMasEDP.py index bee9bd787..e2dc37524 100644 --- a/modules/performSIMULATION/IMasEDP/IMasEDP.py +++ b/modules/performSIMULATION/IMasEDP/IMasEDP.py @@ -45,68 +45,83 @@ import numpy as np -def write_RV(EVENT_input_path): # noqa: C901, N802, N803, D103 +def write_RV(AIM_input_path, EVENT_input_path): # noqa: C901, N802, N803, D103 # open the event file and get the list of events with open(EVENT_input_path, encoding='utf-8') as f: # noqa: PTH123 EVENT_in = json.load(f) # noqa: N806 - - # if there is a list of possible events, load all of them - if len(EVENT_in['randomVariables']) > 0: - event_list = EVENT_in['randomVariables'][0]['elements'] + # open the event file and get the list of events + with open(AIM_input_path, encoding='utf-8') as f: # noqa: PTH123 + AIM_in = json.load(f) # noqa: N806 + # This is for geojson type of event input, which should be the future standard + if 'SimCenterEvent' in AIM_in['Events'][0]: + value_list = AIM_in['Events'][0]['SimCenterEvent']['Values'] + label_list = AIM_in['Events'][0]['SimCenterEvent']['Labels'] + evt = EVENT_in['Events'][0] + data_dir = Path(evt['data_dir']) + f_scale = evt['unitScaleFactor'] + header = label_list + EDP_output = np.array(value_list) # noqa: N806 + + # Below is for backward compatibility with the old format else: - event_list = [ - EVENT_in['Events'][0]['event_id'], - ] + # if there is a list of possible events, load all of them + if len(EVENT_in['randomVariables']) > 0: + event_list = EVENT_in['randomVariables'][0]['elements'] + else: + event_list = [ + EVENT_in['Events'][0]['event_id'], + ] - evt = EVENT_in['Events'][0] - data_dir = Path(evt['data_dir']) - f_scale = evt['unitScaleFactor'] + evt = EVENT_in['Events'][0] + data_dir = Path(evt['data_dir']) + f_scale = evt['unitScaleFactor'] - file_sample_dict = {} + file_sample_dict = {} - for e_i, event in enumerate(event_list): - filename, sample_id, __ = event.split('x') + for e_i, event in enumerate(event_list): + filename, sample_id, __ = event.split('x') - if filename not in file_sample_dict: - file_sample_dict.update({filename: [[], []]}) + if filename not in file_sample_dict: + file_sample_dict.update({filename: [[], []]}) - file_sample_dict[filename][0].append(e_i) - file_sample_dict[filename][1].append(int(sample_id)) + file_sample_dict[filename][0].append(e_i) + file_sample_dict[filename][1].append(int(sample_id)) - EDP_output = None # noqa: N806 + EDP_output = None # noqa: N806 - for filename in file_sample_dict: - # get the header - header_data = np.genfromtxt( - data_dir / filename, - delimiter=',', - names=None, - max_rows=1, - dtype=str, - ndmin=1, - ) - header = header_data # .dtype. + for filename in file_sample_dict: + # get the header + header_data = np.genfromtxt( + data_dir / filename, + delimiter=',', + names=None, + max_rows=1, + dtype=str, + ndmin=1, + ) + header = header_data # .dtype. - data = np.genfromtxt(data_dir / filename, delimiter=',', skip_header=1) + data = np.genfromtxt(data_dir / filename, delimiter=',', skip_header=1) - # get the number of columns and reshape the data - col_count = len(header) - if col_count > 1: - data = data.reshape((data.size // col_count, col_count)) - else: - data = np.atleast_1d(data) + # get the number of columns and reshape the data + col_count = len(header) + if col_count > 1: + data = data.reshape((data.size // col_count, col_count)) + else: + data = np.atleast_1d(data) - # choose the right samples - samples = data[file_sample_dict[filename][1]] + # choose the right samples + samples = data[file_sample_dict[filename][1]] - if EDP_output is None: - if len(samples.shape) > 1: - EDP_output = np.zeros((len(event_list), samples.shape[1])) # noqa: N806 - else: - EDP_output = np.zeros(len(event_list)) # noqa: N806 + if EDP_output is None: + if len(samples.shape) > 1: + EDP_output = np.zeros((len(event_list), samples.shape[1])) # noqa: N806 + else: + EDP_output = np.zeros(len(event_list)) # noqa: N806 - EDP_output[file_sample_dict[filename][0]] = samples + EDP_output[file_sample_dict[filename][0]] = samples + # Below is used for both geojson and old hazard difinition format if len(EDP_output.shape) == 1: EDP_output = np.reshape(EDP_output, (EDP_output.shape[0], 1)) # noqa: N806 @@ -191,6 +206,6 @@ def create_EDP(EVENT_input_path, EDP_input_path): # noqa: N802, N803, D103 args = parser.parse_args() if args.getRV: - sys.exit(write_RV(args.filenameEVENT)) + sys.exit(write_RV(args.filenameAIM, args.filenameEVENT)) else: sys.exit(create_EDP(args.filenameEVENT, args.filenameEDP))