From bc1ce37de3036a904babddd2a1a7b44e24604096 Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Wed, 22 Oct 2025 08:48:12 -0600 Subject: [PATCH 1/2] remove duplicate params from yaml/json param files and add a CI check to detect future duplicate entries --- .github/workflows/general-ci-tests.yml | 17 ++ param_templates/MOM_input.yaml | 26 +-- param_templates/input_data_list.yaml | 3 - param_templates/json/MOM_input.json | 20 +- param_templates/json/input_data_list.json | 2 +- tests/check_duplicate_params.py | 255 ++++++++++++++++++++++ 6 files changed, 289 insertions(+), 34 deletions(-) create mode 100755 tests/check_duplicate_params.py diff --git a/.github/workflows/general-ci-tests.yml b/.github/workflows/general-ci-tests.yml index ebdd8d1..b49dbe3 100644 --- a/.github/workflows/general-ci-tests.yml +++ b/.github/workflows/general-ci-tests.yml @@ -94,6 +94,23 @@ jobs: - name: Run the check_default_params script run: python tests/check_default_params.py + # Job to check if duplicate parameters exist in param template files + check_duplicate_params: + + runs-on: ubuntu-latest + + steps: + # Checkout the repo + - uses: actions/checkout@v4 + + # Run the test + - name: Run the check_duplicate_params script + run: | + python tests/check_duplicate_params.py param_templates/MOM_input.yaml + python tests/check_duplicate_params.py param_templates/input_data_list.yaml + python tests/check_duplicate_params.py param_templates/input_nml.yaml + python tests/check_duplicate_params.py param_templates/diag_table.yaml + # Job to run check_input_data_list script check_input_data_list: diff --git a/param_templates/MOM_input.yaml b/param_templates/MOM_input.yaml index 0126e37..6a20b85 100644 --- a/param_templates/MOM_input.yaml +++ b/param_templates/MOM_input.yaml @@ -89,13 +89,6 @@ Global: $OCN_GRID == "tx2_3v2": 480 $OCN_GRID == "tx0.25v1": 1080 $OCN_GRID == "MISOMIP": 40 - IO_LAYOUT: - description: | - "default = 0 - The processor layout to be used, or 0,0 to automatically - set the io_layout to be the same as the layout." - datatype: string - value: 1, 1 NK: description: | "[nondim] @@ -295,7 +288,9 @@ Global: "default = WRIGHT_FULL EQN_OF_STATE determines which ocean equation of state should be used." datatype: string - value: "WRIGHT_FULL" + value: + $OCN_GRID == "MISOMIP": "LINEAR" + else: "WRIGHT_FULL" DTFREEZE_DP: description: | "[deg C Pa-1] default = 0.0 @@ -2946,16 +2941,6 @@ Global: units: not defined value: $OCN_GRID == "MISOMIP": 1.0 - EQN_OF_STATE: - description: | - "default = 'WRIGHT' - EQN_OF_STATE determines which ocean equation of state - should be used. Currently, the valid choices are - 'LINEAR', 'UNESCO', and 'WRIGHT'. - This is only used if USE_EOS is true." - datatype: string - value: - $OCN_GRID == "MISOMIP": "LINEAR" RHO_T0_S0: description: | "[kg m-3] default = 1000.0 @@ -3148,7 +3133,6 @@ Global: target density." datatype: logical units: Boolean - value: value: $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID == "tx2_3v2": True ISOMIP_TNUDG: @@ -3737,8 +3721,8 @@ Global: be the same as the layout. datatype: list value: - $OCN_GRID == "tx0.25v1": - 4, 3 + $OCN_GRID == "tx0.25v1": 4, 3 + else: 1, 1 AUTO_MASKTABLE: description: | Turn on automatic mask table generation to eliminate land blocks diff --git a/param_templates/input_data_list.yaml b/param_templates/input_data_list.yaml index 569db30..e40a821 100644 --- a/param_templates/input_data_list.yaml +++ b/param_templates/input_data_list.yaml @@ -28,9 +28,6 @@ mom.input_data_list: $OCN_GRID in ["tx2_3v2", "tx0.25v1"]: $INIT_LAYERS_FROM_Z_FILE == "True": "${INPUTDIR}/${TEMP_SALT_Z_INIT_FILE}" - MAX_LAYER_THICKNESS_CONFIG: - $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID in ["tx2_3v2"]: - "${DIN_LOC_ROOT}/ocn/mom/tx2_3v2/dz_max-2025-09-12.nc" SURFACE_PRESSURE_FILE: $OCN_GRID == "MISOMIP": "${INPUTDIR}/MISOMIP_181108.nc" SALT_RESTORE_FILE: diff --git a/param_templates/json/MOM_input.json b/param_templates/json/MOM_input.json index c637b05..d04cfc6 100644 --- a/param_templates/json/MOM_input.json +++ b/param_templates/json/MOM_input.json @@ -43,13 +43,6 @@ "$OCN_GRID == \"MISOMIP\"": 40 } }, - "IO_LAYOUT": { - "description": "The processor layout to be used, or 0,0 to automatically set the io_layout to\nbe the same as the layout.\n", - "datatype": "list", - "value": { - "$OCN_GRID == \"tx0.25v1\"": "4, 3" - } - }, "NK": { "description": "\"[nondim]\nThe number of model layers.\"\n", "datatype": "integer", @@ -200,10 +193,11 @@ } }, "EQN_OF_STATE": { - "description": "\"default = 'WRIGHT'\nEQN_OF_STATE determines which ocean equation of state\nshould be used. Currently, the valid choices are\n'LINEAR', 'UNESCO', and 'WRIGHT'.\nThis is only used if USE_EOS is true.\"\n", + "description": "\"default = WRIGHT_FULL\nEQN_OF_STATE determines which ocean equation of state should be used.\"\n", "datatype": "string", "value": { - "$OCN_GRID == \"MISOMIP\"": "LINEAR" + "$OCN_GRID == \"MISOMIP\"": "LINEAR", + "else": "WRIGHT_FULL" } }, "DTFREEZE_DP": { @@ -3028,6 +3022,14 @@ "$COMP_WAV == \"ww3\" and $MOM6_WW3_CPL_METHOD == \"most\"": "0.04, 0.11, 0.33" } }, + "IO_LAYOUT": { + "description": "The processor layout to be used, or 0,0 to automatically set the io_layout to\nbe the same as the layout.\n", + "datatype": "list", + "value": { + "$OCN_GRID == \"tx0.25v1\"": "4, 3", + "else": "1, 1" + } + }, "AUTO_MASKTABLE": { "description": "Turn on automatic mask table generation to eliminate land blocks\n", "datatype": "list", diff --git a/param_templates/json/input_data_list.json b/param_templates/json/input_data_list.json index f8494ef..c055c69 100644 --- a/param_templates/json/input_data_list.json +++ b/param_templates/json/input_data_list.json @@ -15,7 +15,7 @@ "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/lev-2025-09-12.nc" }, "MAX_LAYER_THICKNESS_CONFIG": { - "$MOM6_VERTICAL_GRID == \"hycom1\" and $OCN_GRID in [\"tx2_3v2\"]": "${DIN_LOC_ROOT}/ocn/mom/tx2_3v2/dz_max-2025-09-12.nc" + "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/dz_max-2025-09-12.nc" }, "DIAG_COORD_DEF_Z": { "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/interpolate_zgrid_40L.nc" diff --git a/tests/check_duplicate_params.py b/tests/check_duplicate_params.py new file mode 100755 index 0000000..3bcd74a --- /dev/null +++ b/tests/check_duplicate_params.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +""" +Function to find duplicate entries at the same level/scope in a YAML file. +This version catches duplicates during parsing before they are overwritten. +""" + +import yaml +import re +import argparse +from collections import defaultdict + + +def find_yaml_duplicates_raw(yaml_file_path): + """ + Find duplicate keys by parsing the raw YAML text line by line. + + Parameters + ---------- + yaml_file_path : str + Path to the YAML file to check + + Returns + ------- + dict + Dictionary with scope/path as key and duplicate info as value + """ + + def get_indentation_level(line): + """Get the indentation level of a line.""" + return len(line) - len(line.lstrip()) + + def extract_key_from_line(line): + """Extract YAML key from a line.""" + stripped = line.strip() + if ':' in stripped and not stripped.startswith('#'): + # Handle different YAML key formats + key_part = stripped.split(':')[0].strip() + # Remove quotes if present + key_part = key_part.strip('"\'') + return key_part + return None + + def is_multiline_string_start(line): + """Check if this line starts a multiline string (| or >).""" + stripped = line.strip() + return ':' in stripped and (stripped.endswith('|') or stripped.endswith('>')) + + try: + with open(yaml_file_path, 'r') as file: + lines = file.readlines() + + # Track keys at each indentation level + keys_by_level = defaultdict(lambda: defaultdict(list)) # {scope: {key: [line_numbers]}} + scope_stack = [] # Track nested scope path + + # State tracking for multiline strings + in_multiline_string = False + multiline_string_indent = 0 + + for line_num, line in enumerate(lines, 1): + # Skip empty lines and comments + if not line.strip() or line.strip().startswith('#'): + continue + + indent = get_indentation_level(line) + + # Check if we're currently in a multiline string + if in_multiline_string: + # We're in a multiline string if the current line is indented more than the string start + # OR if it's at the same level but doesn't contain a colon (continuation of string) + if indent > multiline_string_indent: + # Still inside multiline string, skip this line + continue + elif indent <= multiline_string_indent: + # Check if this line starts a new key at same or higher level + key = extract_key_from_line(line) + if key is not None: + # This is a new key, multiline string has ended + in_multiline_string = False + else: + # This might be a continuation line, skip it + continue + + # If we reach here, we're not in a multiline string (or just exited one) + key = extract_key_from_line(line) + + if key is None: + continue + + # Check if this line starts a multiline string + if is_multiline_string_start(line): + in_multiline_string = True + multiline_string_indent = indent + + # Update scope stack based on indentation + # Remove scopes that are at same or deeper level + while scope_stack and scope_stack[-1][1] >= indent: + scope_stack.pop() + + # Add current key to scope + current_scope = '.'.join([item[0] for item in scope_stack]) + + # Track this key at current scope + keys_by_level[current_scope][key].append(line_num) + + # Add to scope stack for nested items (only if not a multiline string value) + if not is_multiline_string_start(line): + scope_stack.append((key, indent)) + + # Find duplicates + duplicates = {} + for scope, keys_dict in keys_by_level.items(): + scope_duplicates = {} + for key, line_numbers in keys_dict.items(): + if len(line_numbers) > 1: + scope_duplicates[key] = line_numbers + + if scope_duplicates: + scope_name = scope if scope else "root" + duplicates[scope_name] = scope_duplicates + + return duplicates + + except FileNotFoundError: + print(f"Error: File '{yaml_file_path}' not found.") + return {} + except Exception as e: + print(f"Unexpected error: {e}") + return {} + + +def print_duplicates_detailed(duplicates, yaml_file_path=None, show_lines=True): + """ + Pretty print the duplicates found with line numbers. + + Parameters + ---------- + duplicates : dict + Dictionary returned by find_yaml_duplicates_raw() + yaml_file_path : str, optional + Path to YAML file to show actual lines + show_lines : bool + Whether to show actual line content + """ + if not duplicates: + print("No duplicate keys found at any level.") + return + + # Read file content if path provided and lines should be shown + file_lines = [] + if yaml_file_path and show_lines: + try: + with open(yaml_file_path, 'r') as f: + file_lines = f.readlines() + except: + pass + + print("Duplicate keys found:") + print("=" * 60) + + for scope, scope_duplicates in duplicates.items(): + print(f"Scope: {scope}") + print("-" * 40) + + for key, line_numbers in scope_duplicates.items(): + print(f" Duplicate key: '{key}'") + print(f" Found on lines: {', '.join(map(str, line_numbers))}") + + # Show actual lines if file content available and requested + if file_lines and show_lines: + for line_num in line_numbers: + if 1 <= line_num <= len(file_lines): + line_content = file_lines[line_num - 1].rstrip() + print(f" Line {line_num}: {line_content}") + print() + + print() + + +def main(): + """Main function with argparse CLI.""" + parser = argparse.ArgumentParser( + description='Find duplicate keys at the same scope/level in YAML files', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s MOM_input.yaml # Check MOM_input.yaml + %(prog)s -f input.yaml # Check input.yaml + %(prog)s --no-lines MOM_input.yaml # Don't show line content + %(prog)s --quiet MOM_input.yaml # Only show summary + """ + ) + + parser.add_argument( + 'file', + nargs='?', + default='MOM_input.yaml', + help='YAML file to check for duplicates (default: MOM_input.yaml)' + ) + + parser.add_argument( + '-f', '--file', + dest='yaml_file', + help='YAML file to check (alternative to positional argument)' + ) + + parser.add_argument( + '--no-lines', + action='store_true', + help='Do not show actual line content, only line numbers' + ) + + parser.add_argument( + '-q', '--quiet', + action='store_true', + help='Only show summary count of duplicates found' + ) + + parser.add_argument( + '--version', + action='version', + version='%(prog)s 1.1' + ) + + args = parser.parse_args() + + # Determine which file to use + yaml_file = args.yaml_file if args.yaml_file else args.file + + if not args.quiet: + print(f"Checking for duplicates in: {yaml_file}") + print() + + duplicates = find_yaml_duplicates_raw(yaml_file) + + if args.quiet: + # Just show summary + total_duplicates = sum(len(scope_dups) for scope_dups in duplicates.values()) + if total_duplicates > 0: + print(f"Found {total_duplicates} duplicate keys in {len(duplicates)} scopes") + return 1 # Exit code 1 indicates duplicates found + else: + print("No duplicates found") + return 0 + else: + # Show detailed output + show_lines = not args.no_lines + print_duplicates_detailed(duplicates, yaml_file, show_lines) + + # Return appropriate exit code + return 1 if duplicates else 0 + + +if __name__ == "__main__": + exit(main()) From 28bd2de4cb7d079d5ec27f58a16a8f0ab3dff278 Mon Sep 17 00:00:00 2001 From: alperaltuntas Date: Wed, 22 Oct 2025 08:48:59 -0600 Subject: [PATCH 2/2] fix restart file existence check for multi instance runs --- cime_config/buildnml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cime_config/buildnml b/cime_config/buildnml index 16dcdfb..3202381 100755 --- a/cime_config/buildnml +++ b/cime_config/buildnml @@ -399,10 +399,13 @@ def prechecks(case, inst_suffixes): f"Missing rpointer files in rundir. Expected files with pattern {rpointer_pattern}.", ) - # check if the restart file is present in rundir + # check if the restart file is present in rundir for fresh branch or hybrid runs if run_type in ["branch", "hybrid"] and not continue_run and not get_refcase: - restart_file = os.path.join(rundir, f'./{run_refcase}.mom6.r.{run_refdate}-{run_reftod}.nc') - assert os.path.exists(restart_file), f"Missing restart file {run_refcase}.mom6.r.{run_refdate}-{run_reftod}.nc in rundir." + restart_file_pattern = run_refcase + r".mom6.*.r." + run_refdate + "-" + run_reftod + r".*nc$" + restart_files = [ + f for f in os.listdir(rundir) if re.match(restart_file_pattern, f) + ] + assert len(restart_files) > 0, f"Missing restart file in rundir. Expected file matching pattern {restart_file_pattern}." def postchecks(case, MOM_input_final): """Performs checks after input files are generated. To be called within prep_input() as a final step."""