From bc1ce37de3036a904babddd2a1a7b44e24604096 Mon Sep 17 00:00:00 2001
From: alperaltuntas <alperaltuntas@gmail.com>
Date: Wed, 22 Oct 2025 08:48:12 -0600
Subject: [PATCH 1/2] remove duplicate params from yaml/json param files and
 add a CI check to detect future duplicate entries

---
 .github/workflows/general-ci-tests.yml    |  17 ++
 param_templates/MOM_input.yaml            |  26 +--
 param_templates/input_data_list.yaml      |   3 -
 param_templates/json/MOM_input.json       |  20 +-
 param_templates/json/input_data_list.json |   2 +-
 tests/check_duplicate_params.py           | 255 ++++++++++++++++++++++
 6 files changed, 289 insertions(+), 34 deletions(-)
 create mode 100755 tests/check_duplicate_params.py

diff --git a/.github/workflows/general-ci-tests.yml b/.github/workflows/general-ci-tests.yml
index ebdd8d1..b49dbe3 100644
--- a/.github/workflows/general-ci-tests.yml
+++ b/.github/workflows/general-ci-tests.yml
@@ -94,6 +94,23 @@ jobs:
       - name: Run the check_default_params script
         run: python tests/check_default_params.py
   
+  # Job to check if duplicate parameters exist in param template files
+  check_duplicate_params:
+    
+    runs-on: ubuntu-latest
+    
+    steps:
+      # Checkout the repo
+      - uses: actions/checkout@v4
+      
+      # Run the test
+      - name: Run the check_duplicate_params script
+        run: |
+          python tests/check_duplicate_params.py param_templates/MOM_input.yaml
+          python tests/check_duplicate_params.py param_templates/input_data_list.yaml
+          python tests/check_duplicate_params.py param_templates/input_nml.yaml
+          python tests/check_duplicate_params.py param_templates/diag_table.yaml
+  
   # Job to run check_input_data_list script
   check_input_data_list:
     
diff --git a/param_templates/MOM_input.yaml b/param_templates/MOM_input.yaml
index 0126e37..6a20b85 100644
--- a/param_templates/MOM_input.yaml
+++ b/param_templates/MOM_input.yaml
@@ -89,13 +89,6 @@ Global:
             $OCN_GRID == "tx2_3v2": 480
             $OCN_GRID == "tx0.25v1": 1080
             $OCN_GRID == "MISOMIP": 40
-    IO_LAYOUT:
-        description: |
-            "default = 0
-            The processor layout to be used, or 0,0 to automatically
-            set the io_layout to be the same as the layout."
-        datatype: string
-        value: 1, 1
     NK:
         description: |
             "[nondim]
@@ -295,7 +288,9 @@ Global:
             "default = WRIGHT_FULL
             EQN_OF_STATE determines which ocean equation of state should be used."
         datatype: string
-        value: "WRIGHT_FULL"
+        value: 
+            $OCN_GRID == "MISOMIP": "LINEAR"
+            else: "WRIGHT_FULL"
     DTFREEZE_DP:
         description: |
             "[deg C Pa-1] default = 0.0
@@ -2946,16 +2941,6 @@ Global:
         units: not defined
         value:
             $OCN_GRID == "MISOMIP": 1.0
-    EQN_OF_STATE:
-        description: |
-            "default = 'WRIGHT'
-            EQN_OF_STATE determines which ocean equation of state
-            should be used.  Currently, the valid choices are
-            'LINEAR', 'UNESCO', and 'WRIGHT'.
-            This is only used if USE_EOS is true."
-        datatype: string
-        value:
-            $OCN_GRID == "MISOMIP": "LINEAR"
     RHO_T0_S0:
         description: |
             "[kg m-3] default = 1000.0
@@ -3148,7 +3133,6 @@ Global:
              target density."
         datatype: logical
         units: Boolean
-        value:
         value:
             $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID == "tx2_3v2": True
     ISOMIP_TNUDG:
@@ -3737,8 +3721,8 @@ Global:
             be the same as the layout.
         datatype: list
         value:
-            $OCN_GRID == "tx0.25v1":
-              4, 3
+            $OCN_GRID == "tx0.25v1": 4, 3
+            else: 1, 1
     AUTO_MASKTABLE:
         description: |
             Turn on automatic mask table generation to eliminate land blocks
diff --git a/param_templates/input_data_list.yaml b/param_templates/input_data_list.yaml
index 569db30..e40a821 100644
--- a/param_templates/input_data_list.yaml
+++ b/param_templates/input_data_list.yaml
@@ -28,9 +28,6 @@ mom.input_data_list:
         $OCN_GRID in ["tx2_3v2", "tx0.25v1"]:
             $INIT_LAYERS_FROM_Z_FILE == "True":
                 "${INPUTDIR}/${TEMP_SALT_Z_INIT_FILE}"
-    MAX_LAYER_THICKNESS_CONFIG:
-        $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID in ["tx2_3v2"]:
-                  "${DIN_LOC_ROOT}/ocn/mom/tx2_3v2/dz_max-2025-09-12.nc"
     SURFACE_PRESSURE_FILE:
         $OCN_GRID == "MISOMIP": "${INPUTDIR}/MISOMIP_181108.nc"
     SALT_RESTORE_FILE:
diff --git a/param_templates/json/MOM_input.json b/param_templates/json/MOM_input.json
index c637b05..d04cfc6 100644
--- a/param_templates/json/MOM_input.json
+++ b/param_templates/json/MOM_input.json
@@ -43,13 +43,6 @@
             "$OCN_GRID == \"MISOMIP\"": 40
          }
       },
-      "IO_LAYOUT": {
-         "description": "The processor layout to be used, or 0,0 to automatically set the io_layout to\nbe the same as the layout.\n",
-         "datatype": "list",
-         "value": {
-            "$OCN_GRID == \"tx0.25v1\"": "4, 3"
-         }
-      },
       "NK": {
          "description": "\"[nondim]\nThe number of model layers.\"\n",
          "datatype": "integer",
@@ -200,10 +193,11 @@
          }
       },
       "EQN_OF_STATE": {
-         "description": "\"default = 'WRIGHT'\nEQN_OF_STATE determines which ocean equation of state\nshould be used.  Currently, the valid choices are\n'LINEAR', 'UNESCO', and 'WRIGHT'.\nThis is only used if USE_EOS is true.\"\n",
+         "description": "\"default = WRIGHT_FULL\nEQN_OF_STATE determines which ocean equation of state should be used.\"\n",
          "datatype": "string",
          "value": {
-            "$OCN_GRID == \"MISOMIP\"": "LINEAR"
+            "$OCN_GRID == \"MISOMIP\"": "LINEAR",
+            "else": "WRIGHT_FULL"
          }
       },
       "DTFREEZE_DP": {
@@ -3028,6 +3022,14 @@
             "$COMP_WAV == \"ww3\" and $MOM6_WW3_CPL_METHOD == \"most\"": "0.04, 0.11, 0.33"
          }
       },
+      "IO_LAYOUT": {
+         "description": "The processor layout to be used, or 0,0 to automatically set the io_layout to\nbe the same as the layout.\n",
+         "datatype": "list",
+         "value": {
+            "$OCN_GRID == \"tx0.25v1\"": "4, 3",
+            "else": "1, 1"
+         }
+      },
       "AUTO_MASKTABLE": {
          "description": "Turn on automatic mask table generation to eliminate land blocks\n",
          "datatype": "list",
diff --git a/param_templates/json/input_data_list.json b/param_templates/json/input_data_list.json
index f8494ef..c055c69 100644
--- a/param_templates/json/input_data_list.json
+++ b/param_templates/json/input_data_list.json
@@ -15,7 +15,7 @@
          "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/lev-2025-09-12.nc"
       },
       "MAX_LAYER_THICKNESS_CONFIG": {
-         "$MOM6_VERTICAL_GRID == \"hycom1\" and $OCN_GRID in [\"tx2_3v2\"]": "${DIN_LOC_ROOT}/ocn/mom/tx2_3v2/dz_max-2025-09-12.nc"
+         "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/dz_max-2025-09-12.nc"
       },
       "DIAG_COORD_DEF_Z": {
          "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/interpolate_zgrid_40L.nc"
diff --git a/tests/check_duplicate_params.py b/tests/check_duplicate_params.py
new file mode 100755
index 0000000..3bcd74a
--- /dev/null
+++ b/tests/check_duplicate_params.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+"""
+Function to find duplicate entries at the same level/scope in a YAML file.
+This version catches duplicates during parsing before they are overwritten.
+"""
+
+import yaml
+import re
+import argparse
+from collections import defaultdict
+
+
+def find_yaml_duplicates_raw(yaml_file_path):
+    """
+    Find duplicate keys by parsing the raw YAML text line by line.
+    
+    Parameters
+    ----------
+    yaml_file_path : str
+        Path to the YAML file to check
+        
+    Returns
+    -------
+    dict
+        Dictionary with scope/path as key and duplicate info as value
+    """
+    
+    def get_indentation_level(line):
+        """Get the indentation level of a line."""
+        return len(line) - len(line.lstrip())
+    
+    def extract_key_from_line(line):
+        """Extract YAML key from a line."""
+        stripped = line.strip()
+        if ':' in stripped and not stripped.startswith('#'):
+            # Handle different YAML key formats
+            key_part = stripped.split(':')[0].strip()
+            # Remove quotes if present
+            key_part = key_part.strip('"\'')
+            return key_part
+        return None
+    
+    def is_multiline_string_start(line):
+        """Check if this line starts a multiline string (| or >)."""
+        stripped = line.strip()
+        return ':' in stripped and (stripped.endswith('|') or stripped.endswith('>'))
+    
+    try:
+        with open(yaml_file_path, 'r') as file:
+            lines = file.readlines()
+        
+        # Track keys at each indentation level
+        keys_by_level = defaultdict(lambda: defaultdict(list))  # {scope: {key: [line_numbers]}}
+        scope_stack = []  # Track nested scope path
+        
+        # State tracking for multiline strings
+        in_multiline_string = False
+        multiline_string_indent = 0
+        
+        for line_num, line in enumerate(lines, 1):
+            # Skip empty lines and comments
+            if not line.strip() or line.strip().startswith('#'):
+                continue
+            
+            indent = get_indentation_level(line)
+            
+            # Check if we're currently in a multiline string
+            if in_multiline_string:
+                # We're in a multiline string if the current line is indented more than the string start
+                # OR if it's at the same level but doesn't contain a colon (continuation of string)
+                if indent > multiline_string_indent:
+                    # Still inside multiline string, skip this line
+                    continue
+                elif indent <= multiline_string_indent:
+                    # Check if this line starts a new key at same or higher level
+                    key = extract_key_from_line(line)
+                    if key is not None:
+                        # This is a new key, multiline string has ended
+                        in_multiline_string = False
+                    else:
+                        # This might be a continuation line, skip it
+                        continue
+            
+            # If we reach here, we're not in a multiline string (or just exited one)
+            key = extract_key_from_line(line)
+            
+            if key is None:
+                continue
+            
+            # Check if this line starts a multiline string
+            if is_multiline_string_start(line):
+                in_multiline_string = True
+                multiline_string_indent = indent
+            
+            # Update scope stack based on indentation
+            # Remove scopes that are at same or deeper level
+            while scope_stack and scope_stack[-1][1] >= indent:
+                scope_stack.pop()
+            
+            # Add current key to scope
+            current_scope = '.'.join([item[0] for item in scope_stack])
+            
+            # Track this key at current scope
+            keys_by_level[current_scope][key].append(line_num)
+            
+            # Add to scope stack for nested items (only if not a multiline string value)
+            if not is_multiline_string_start(line):
+                scope_stack.append((key, indent))
+        
+        # Find duplicates
+        duplicates = {}
+        for scope, keys_dict in keys_by_level.items():
+            scope_duplicates = {}
+            for key, line_numbers in keys_dict.items():
+                if len(line_numbers) > 1:
+                    scope_duplicates[key] = line_numbers
+            
+            if scope_duplicates:
+                scope_name = scope if scope else "root"
+                duplicates[scope_name] = scope_duplicates
+        
+        return duplicates
+        
+    except FileNotFoundError:
+        print(f"Error: File '{yaml_file_path}' not found.")
+        return {}
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return {}
+
+
+def print_duplicates_detailed(duplicates, yaml_file_path=None, show_lines=True):
+    """
+    Pretty print the duplicates found with line numbers.
+    
+    Parameters
+    ----------
+    duplicates : dict
+        Dictionary returned by find_yaml_duplicates_raw()
+    yaml_file_path : str, optional
+        Path to YAML file to show actual lines
+    show_lines : bool
+        Whether to show actual line content
+    """
+    if not duplicates:
+        print("No duplicate keys found at any level.")
+        return
+    
+    # Read file content if path provided and lines should be shown
+    file_lines = []
+    if yaml_file_path and show_lines:
+        try:
+            with open(yaml_file_path, 'r') as f:
+                file_lines = f.readlines()
+        except:
+            pass
+    
+    print("Duplicate keys found:")
+    print("=" * 60)
+    
+    for scope, scope_duplicates in duplicates.items():
+        print(f"Scope: {scope}")
+        print("-" * 40)
+        
+        for key, line_numbers in scope_duplicates.items():
+            print(f"  Duplicate key: '{key}'")
+            print(f"  Found on lines: {', '.join(map(str, line_numbers))}")
+            
+            # Show actual lines if file content available and requested
+            if file_lines and show_lines:
+                for line_num in line_numbers:
+                    if 1 <= line_num <= len(file_lines):
+                        line_content = file_lines[line_num - 1].rstrip()
+                        print(f"    Line {line_num}: {line_content}")
+            print()
+        
+        print()
+
+
+def main():
+    """Main function with argparse CLI."""
+    parser = argparse.ArgumentParser(
+        description='Find duplicate keys at the same scope/level in YAML files',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s MOM_input.yaml                    # Check MOM_input.yaml
+  %(prog)s -f input.yaml                     # Check input.yaml
+  %(prog)s --no-lines MOM_input.yaml         # Don't show line content
+  %(prog)s --quiet MOM_input.yaml            # Only show summary
+        """
+    )
+    
+    parser.add_argument(
+        'file',
+        nargs='?',
+        default='MOM_input.yaml',
+        help='YAML file to check for duplicates (default: MOM_input.yaml)'
+    )
+    
+    parser.add_argument(
+        '-f', '--file',
+        dest='yaml_file',
+        help='YAML file to check (alternative to positional argument)'
+    )
+    
+    parser.add_argument(
+        '--no-lines',
+        action='store_true',
+        help='Do not show actual line content, only line numbers'
+    )
+    
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
+        help='Only show summary count of duplicates found'
+    )
+    
+    parser.add_argument(
+        '--version',
+        action='version',
+        version='%(prog)s 1.1'
+    )
+    
+    args = parser.parse_args()
+    
+    # Determine which file to use
+    yaml_file = args.yaml_file if args.yaml_file else args.file
+    
+    if not args.quiet:
+        print(f"Checking for duplicates in: {yaml_file}")
+        print()
+    
+    duplicates = find_yaml_duplicates_raw(yaml_file)
+    
+    if args.quiet:
+        # Just show summary
+        total_duplicates = sum(len(scope_dups) for scope_dups in duplicates.values())
+        if total_duplicates > 0:
+            print(f"Found {total_duplicates} duplicate keys in {len(duplicates)} scopes")
+            return 1  # Exit code 1 indicates duplicates found
+        else:
+            print("No duplicates found")
+            return 0
+    else:
+        # Show detailed output
+        show_lines = not args.no_lines
+        print_duplicates_detailed(duplicates, yaml_file, show_lines)
+        
+        # Return appropriate exit code
+        return 1 if duplicates else 0
+
+
+if __name__ == "__main__":
+    exit(main())

From 28bd2de4cb7d079d5ec27f58a16a8f0ab3dff278 Mon Sep 17 00:00:00 2001
From: alperaltuntas <alperaltuntas@gmail.com>
Date: Wed, 22 Oct 2025 08:48:59 -0600
Subject: [PATCH 2/2] fix restart file existence check for multi instance runs

---
 cime_config/buildnml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cime_config/buildnml b/cime_config/buildnml
index 16dcdfb..3202381 100755
--- a/cime_config/buildnml
+++ b/cime_config/buildnml
@@ -399,10 +399,13 @@ def prechecks(case, inst_suffixes):
                 f"Missing rpointer files in rundir. Expected files with pattern {rpointer_pattern}.",
             )
         
-    # check if the restart file is present in rundir
+    # check if the restart file is present in rundir for fresh branch or hybrid runs
     if run_type in ["branch", "hybrid"] and not continue_run and not get_refcase:
-        restart_file = os.path.join(rundir, f'./{run_refcase}.mom6.r.{run_refdate}-{run_reftod}.nc')
-        assert os.path.exists(restart_file), f"Missing restart file {run_refcase}.mom6.r.{run_refdate}-{run_reftod}.nc in rundir."
+        restart_file_pattern = run_refcase + r".mom6.*.r." + run_refdate + "-" + run_reftod + r".*nc$"
+        restart_files = [
+            f for f in os.listdir(rundir) if re.match(restart_file_pattern, f)
+        ]
+        assert len(restart_files) > 0, f"Missing restart file in rundir. Expected file matching pattern {restart_file_pattern}."
 
 def postchecks(case,  MOM_input_final):
     """Performs checks after input files are generated. To be called within prep_input() as a final step."""