NVIDIA · ko3n1g · Nov 20, 2024 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -13,8 +13,8 @@ ARG MAX_JOBS=8
 # Git refs for dependencies
 ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
 ARG PYTRITON_VERSION=0.5.10
-ARG NEMO_TAG=19668e5320a2e2af0199b6d5e0b841993be3a634  # On: main
-ARG MLM_TAG=25059d3bbf68be0751800f3644731df12a88f3f3   # On: main
+ARG NEMO_TAG=ko3n1g/build/move-to-req  # On: main
+ARG MCORE_TAG=25059d3bbf68be0751800f3644731df12a88f3f3   # On: main
 ARG ALIGNER_COMMIT=main
 ARG TRTLLM_VERSION=v0.13.0
 ARG PROTOBUF_VERSION=4.24.4
@@ -34,8 +34,6 @@ git checkout -f $ALIGNER_COMMIT
 # case 1: ALIGNER_COMMIT is a local branch so we have to apply remote changes to it
 # case 2: ALIGNER_COMMIT is a commit, so git-pull is expected to fail
 git pull --rebase || true
-
-pip install --no-cache-dir --no-deps -e .
 EOF
 
 FROM ${BASE_IMAGE} as final
@@ -44,31 +42,31 @@ WORKDIR /opt
 # needed in case git complains that it can't detect a valid email, this email is fake but works
 RUN git config --global user.email "worker@nvidia.com"
 # install latest apex
-ARG APEX_TAG
-RUN pip uninstall -y apex && \
-    git clone https://github.com/NVIDIA/apex && \
-    cd apex && \
-    if [ ! -z $APEX_TAG ]; then \
-        git fetch origin $APEX_TAG && \
-        git checkout FETCH_HEAD; \
-    fi && \
-    pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
+# ARG APEX_TAG
+# RUN pip uninstall -y apex && \
+#     git clone https://github.com/NVIDIA/apex && \
+#     cd apex && \
+#     if [ ! -z $APEX_TAG ]; then \
+#     git fetch origin $APEX_TAG && \
+#     git checkout FETCH_HEAD; \
+#     fi && \
+#     pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
 
 # Git LFS
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
     apt-get install git-lfs && \
     git lfs install && \
     apt-get clean
 
-# TRTLLM
-ARG TRTLLM_VERSION
-RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
-    cd TensorRT-LLM && \
-    git checkout ${TRTLLM_VERSION} && \
-    . docker/common/install_tensorrt.sh && \
-    python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt  --python_bindings --benchmarks && \
-    pip install -e .
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/
+# # TRTLLM
+# ARG TRTLLM_VERSION
+# RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
+#     cd TensorRT-LLM && \
+#     git checkout ${TRTLLM_VERSION} && \
+#     . docker/common/install_tensorrt.sh && \
+#     python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt  --python_bindings --benchmarks && \
+#     pip install -e .
+# ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/
 
 # install TransformerEngine
 ARG MAX_JOBS
@@ -77,47 +75,37 @@ RUN pip uninstall -y transformer-engine && \
     git clone https://github.com/NVIDIA/TransformerEngine.git && \
     cd TransformerEngine && \
     if [ ! -z $TE_TAG ]; then \
-        git fetch origin $TE_TAG && \
-        git checkout FETCH_HEAD; \
+    git fetch origin $TE_TAG && \
+    git checkout FETCH_HEAD; \
     fi && \
-    git submodule init && git submodule update && \
-    NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .
-
-# place any util pkgs here
-ARG PYTRITON_VERSION
-RUN pip install --upgrade-strategy only-if-needed nvidia-pytriton==$PYTRITON_VERSION
-ARG PROTOBUF_VERSION
-RUN pip install -U --no-deps protobuf==$PROTOBUF_VERSION
-RUN pip install --upgrade-strategy only-if-needed jsonlines
+    git submodule init && git submodule update
 
 # NeMo
 ARG NEMO_TAG
 RUN git clone https://github.com/NVIDIA/NeMo.git && \
     cd NeMo && \
     git pull && \
     if [ ! -z $NEMO_TAG ]; then \
-        git fetch origin $NEMO_TAG && \
-        git checkout FETCH_HEAD; \
-    fi && \
-    pip uninstall -y nemo_toolkit sacrebleu && \
-    pip install -e ".[nlp]" && \
-    cd nemo/collections/nlp/data/language_modeling/megatron && make
+    git fetch origin $NEMO_TAG && \
+    git checkout FETCH_HEAD; \
+    fi
 
 # MLM
-ARG MLM_TAG
+ARG MCORE_TAG
 RUN pip uninstall -y megatron-core && \
     git clone https://github.com/NVIDIA/Megatron-LM.git && \
     cd Megatron-LM && \
     git pull && \
-    if [ ! -z $MLM_TAG ]; then \
-        git fetch origin $MLM_TAG && \
-        git checkout FETCH_HEAD; \
-    fi && \
-    pip install -e .
+    if [ ! -z $MCORE_TAG ]; then \
+    git fetch origin $MCORE_TAG && \
+    git checkout FETCH_HEAD; \
+    fi
 
 COPY --from=aligner-bump /opt/NeMo-Aligner /opt/NeMo-Aligner
+ARG PYTRITON_VERSION
+ARG PROTOBUF_VERSION
 RUN cd /opt/NeMo-Aligner && \
-    pip install --no-deps -e .
+    NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .
 
 RUN cd TensorRT-LLM && patch -p1 < ../NeMo-Aligner/setup/trtllm.patch
 

diff --git a/nemo_aligner/utils/trt_llm.py b/nemo_aligner/utils/trt_llm.py
@@ -44,8 +44,9 @@ def append_and_repad_list(list_of_items, item_to_append, pad_id):
 
 
 class GPTGenerateTRTLLM:
-    # If a tokenizer does not have a pad_id, we use a large negative number and replace
-    # with self.eos_id after generation.
+    # Use a reserved negative number since there is variation between tokenizers if
+    #  they (1) have a pad_id (2) don't have a pad_id or (3) have None as the pad_id.
+    #  This pad_id is replaced with eos_id after generation.
     DEFAULT_PAD_ID = -42
 
     def __init__(
@@ -72,12 +73,6 @@ def __init__(
                 "You are trying to use NeMo-Aligner's TensorRT-LLM acceleration for LLM generation. Please build the dockerfile to enable this feature: https://github.com/NVIDIA/NeMo-Aligner/blob/main/Dockerfile"
             )
 
-        # If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to:
-        #   - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should
-        #     be a token that the model is guaranteed to never generate)
-        assert (
-            tokenizer.pad_id != tokenizer.eos_id
-        ), f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} when using TRT-LLM. This is to make sure all code goes into the same path and include the eos_id when the response lengths are computed"
         assert max_input_len > 0
         assert max_generation_length > 0
         assert (
@@ -104,7 +99,7 @@ def __init__(
         rng_generator.manual_seed(seed)
         self.rng_generator = rng_generator
 
-        self.pad_id = tokenizer.pad_id if tokenizer.pad_id is not None else GPTGenerateTRTLLM.DEFAULT_PAD_ID
+        self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID
         self.eos_id = tokenizer.eos_id
         end_strings = list(end_strings)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,8 @@
 profile = "black"  # black-compatible
 line_length = 119  # should match black parameters
 ignore_whitespace = true  # ignore whitespace for compatibility with the initial style
-py_version = 38  # python 3.8 as a target version
+py_version = 310  # python 3.9 as a target version
+requires-python = ">=3.10"
 known_first_party = ["nemo", "nemo_aligner"]  # FIRSTPARTY section
 known_third_party = ["examples"]  # THIRDPARTY section
 sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]

diff --git a/setup.py b/setup.py
@@ -23,7 +23,7 @@
 import subprocess
 from distutils import cmd as distutils_cmd
 from distutils import log as distutils_log
-
+import re
 import setuptools
 
 spec = importlib.util.spec_from_file_location("package_info", "nemo_aligner/package_info.py")
@@ -62,13 +62,30 @@
 #                             Dependency Loading                              #
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
 
+# Function to replace ${VAR} or ${VAR:-default_value} with environment variable or default
+def replace_env_vars(text):
+    # Regex to match ${VAR} or ${VAR:-default_value}
+    pattern = re.compile(r"\$\{(\w+)(:-([^}]*))?\}")
+
+    def replace_var(match):
+        var_name = match.group(1)  # The environment variable name
+        default_value = match.group(3)  # The default value if provided
+
+        # Return the environment variable value or the default (if available) or empty string
+        return os.environ.get(var_name, default_value if default_value is not None else f"${{{var_name}}}")
+
+    # Substitute all patterns in the text
+    return pattern.sub(replace_var, text)
+
 
 def req_file(filename, folder="requirements"):
     with open(os.path.join(folder, filename), encoding="utf-8") as f:
         content = f.readlines()
-    # you may also want to remove whitespace characters
-    # Example: `\n` at the end of each line
-    return [x.strip() for x in content if x.strip()]
+    requirements = [x.strip() for x in content]
+    requirements = [
+        replace_env_vars(line.strip()) for line in requirements if line.strip() and not line.startswith("#")
+    ]
+    return requirements
 
 
 install_requires = req_file("requirements.txt", folder="setup")

diff --git a/setup/requirements.txt b/setup/requirements.txt
@@ -1,4 +1,5 @@
+datasets>=3.0.1
 jsonlines
-megatron_core>=0.8
-nemo_toolkit[nlp]
-nvidia-pytriton
+nemo_toolkit[nlp] @ git+https://github.com/NVIDIA/NeMo.git@${NEMO_TAG}#egg=nemo_toolkit[nlp]
+nvidia-pytriton #==${PYTRITON_VERSION:-0.5.10}
+protobuf==${PROTOBUF_VERSION:-4.24.4}