Skip to content
This repository was archived by the owner on Nov 19, 2025. It is now read-only.
Draft
80 changes: 34 additions & 46 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ ARG MAX_JOBS=8
# Git refs for dependencies
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG PYTRITON_VERSION=0.5.10
ARG NEMO_TAG=19668e5320a2e2af0199b6d5e0b841993be3a634 # On: main
ARG MLM_TAG=25059d3bbf68be0751800f3644731df12a88f3f3 # On: main
ARG NEMO_TAG=ko3n1g/build/move-to-req # On: main
ARG MCORE_TAG=25059d3bbf68be0751800f3644731df12a88f3f3 # On: main
ARG ALIGNER_COMMIT=main
ARG TRTLLM_VERSION=v0.13.0
ARG PROTOBUF_VERSION=4.24.4
Expand All @@ -34,8 +34,6 @@ git checkout -f $ALIGNER_COMMIT
# case 1: ALIGNER_COMMIT is a local branch so we have to apply remote changes to it
# case 2: ALIGNER_COMMIT is a commit, so git-pull is expected to fail
git pull --rebase || true

pip install --no-cache-dir --no-deps -e .
EOF

FROM ${BASE_IMAGE} as final
Expand All @@ -44,31 +42,31 @@ WORKDIR /opt
# needed in case git complains that it can't detect a valid email, this email is fake but works
RUN git config --global user.email "worker@nvidia.com"
# install latest apex
ARG APEX_TAG
RUN pip uninstall -y apex && \
git clone https://github.com/NVIDIA/apex && \
cd apex && \
if [ ! -z $APEX_TAG ]; then \
git fetch origin $APEX_TAG && \
git checkout FETCH_HEAD; \
fi && \
pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
# ARG APEX_TAG
# RUN pip uninstall -y apex && \
# git clone https://github.com/NVIDIA/apex && \
# cd apex && \
# if [ ! -z $APEX_TAG ]; then \
# git fetch origin $APEX_TAG && \
# git checkout FETCH_HEAD; \
# fi && \
# pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./

# Git LFS
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
apt-get install git-lfs && \
git lfs install && \
apt-get clean

# TRTLLM
ARG TRTLLM_VERSION
RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
cd TensorRT-LLM && \
git checkout ${TRTLLM_VERSION} && \
. docker/common/install_tensorrt.sh && \
python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --python_bindings --benchmarks && \
pip install -e .
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/
# # TRTLLM
# ARG TRTLLM_VERSION
# RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && \
# cd TensorRT-LLM && \
# git checkout ${TRTLLM_VERSION} && \
# . docker/common/install_tensorrt.sh && \
# python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --python_bindings --benchmarks && \
# pip install -e .
# ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12/compat/lib.real/

# install TransformerEngine
ARG MAX_JOBS
Expand All @@ -77,47 +75,37 @@ RUN pip uninstall -y transformer-engine && \
git clone https://github.com/NVIDIA/TransformerEngine.git && \
cd TransformerEngine && \
if [ ! -z $TE_TAG ]; then \
git fetch origin $TE_TAG && \
git checkout FETCH_HEAD; \
git fetch origin $TE_TAG && \
git checkout FETCH_HEAD; \
fi && \
git submodule init && git submodule update && \
NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .

# place any util pkgs here
ARG PYTRITON_VERSION
RUN pip install --upgrade-strategy only-if-needed nvidia-pytriton==$PYTRITON_VERSION
ARG PROTOBUF_VERSION
RUN pip install -U --no-deps protobuf==$PROTOBUF_VERSION
RUN pip install --upgrade-strategy only-if-needed jsonlines
git submodule init && git submodule update

# NeMo
ARG NEMO_TAG
RUN git clone https://github.com/NVIDIA/NeMo.git && \
cd NeMo && \
git pull && \
if [ ! -z $NEMO_TAG ]; then \
git fetch origin $NEMO_TAG && \
git checkout FETCH_HEAD; \
fi && \
pip uninstall -y nemo_toolkit sacrebleu && \
pip install -e ".[nlp]" && \
cd nemo/collections/nlp/data/language_modeling/megatron && make
git fetch origin $NEMO_TAG && \
git checkout FETCH_HEAD; \
fi

# MLM
ARG MLM_TAG
ARG MCORE_TAG
RUN pip uninstall -y megatron-core && \
git clone https://github.com/NVIDIA/Megatron-LM.git && \
cd Megatron-LM && \
git pull && \
if [ ! -z $MLM_TAG ]; then \
git fetch origin $MLM_TAG && \
git checkout FETCH_HEAD; \
fi && \
pip install -e .
if [ ! -z $MCORE_TAG ]; then \
git fetch origin $MCORE_TAG && \
git checkout FETCH_HEAD; \
fi

COPY --from=aligner-bump /opt/NeMo-Aligner /opt/NeMo-Aligner
ARG PYTRITON_VERSION
ARG PROTOBUF_VERSION
RUN cd /opt/NeMo-Aligner && \
pip install --no-deps -e .
NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .

RUN cd TensorRT-LLM && patch -p1 < ../NeMo-Aligner/setup/trtllm.patch

Expand Down
13 changes: 4 additions & 9 deletions nemo_aligner/utils/trt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ def append_and_repad_list(list_of_items, item_to_append, pad_id):


class GPTGenerateTRTLLM:
# If a tokenizer does not have a pad_id, we use a large negative number and replace
# with self.eos_id after generation.
# Use a reserved negative number since there is variation between tokenizers if
# they (1) have a pad_id (2) don't have a pad_id or (3) have None as the pad_id.
# This pad_id is replaced with eos_id after generation.
DEFAULT_PAD_ID = -42

def __init__(
Expand All @@ -72,12 +73,6 @@ def __init__(
"You are trying to use NeMo-Aligner's TensorRT-LLM acceleration for LLM generation. Please build the dockerfile to enable this feature: https://github.com/NVIDIA/NeMo-Aligner/blob/main/Dockerfile"
)

# If this assert turns out to be a blocker with some tokenizers, potential workarounds could be to:
# - add a config option to allow specifying which token we pass as `end_id` to TRT-LLM (should
# be a token that the model is guaranteed to never generate)
assert (
tokenizer.pad_id != tokenizer.eos_id
), f"We require tokenizers to have a different {tokenizer.pad_id=} than {tokenizer.eos_id=} when using TRT-LLM. This is to make sure all code goes into the same path and include the eos_id when the response lengths are computed"
assert max_input_len > 0
assert max_generation_length > 0
assert (
Expand All @@ -104,7 +99,7 @@ def __init__(
rng_generator.manual_seed(seed)
self.rng_generator = rng_generator

self.pad_id = tokenizer.pad_id if tokenizer.pad_id is not None else GPTGenerateTRTLLM.DEFAULT_PAD_ID
self.pad_id = GPTGenerateTRTLLM.DEFAULT_PAD_ID
self.eos_id = tokenizer.eos_id
end_strings = list(end_strings)

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
profile = "black" # black-compatible
line_length = 119 # should match black parameters
ignore_whitespace = true # ignore whitespace for compatibility with the initial style
py_version = 38 # python 3.8 as a target version
py_version = 310 # python 3.9 as a target version
requires-python = ">=3.10"
known_first_party = ["nemo", "nemo_aligner"] # FIRSTPARTY section
known_third_party = ["examples"] # THIRDPARTY section
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
Expand Down
25 changes: 21 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import subprocess
from distutils import cmd as distutils_cmd
from distutils import log as distutils_log

import re
import setuptools

spec = importlib.util.spec_from_file_location("package_info", "nemo_aligner/package_info.py")
Expand Down Expand Up @@ -62,13 +62,30 @@
# Dependency Loading #
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #

# Function to replace ${VAR} or ${VAR:-default_value} with environment variable or default
def replace_env_vars(text):
# Regex to match ${VAR} or ${VAR:-default_value}
pattern = re.compile(r"\$\{(\w+)(:-([^}]*))?\}")

def replace_var(match):
var_name = match.group(1) # The environment variable name
default_value = match.group(3) # The default value if provided

# Return the environment variable value or the default (if available) or empty string
return os.environ.get(var_name, default_value if default_value is not None else f"${{{var_name}}}")

# Substitute all patterns in the text
return pattern.sub(replace_var, text)


def req_file(filename, folder="requirements"):
with open(os.path.join(folder, filename), encoding="utf-8") as f:
content = f.readlines()
# you may also want to remove whitespace characters
# Example: `\n` at the end of each line
return [x.strip() for x in content if x.strip()]
requirements = [x.strip() for x in content]
requirements = [
replace_env_vars(line.strip()) for line in requirements if line.strip() and not line.startswith("#")
]
return requirements


install_requires = req_file("requirements.txt", folder="setup")
Expand Down
7 changes: 4 additions & 3 deletions setup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
datasets>=3.0.1
jsonlines
megatron_core>=0.8
nemo_toolkit[nlp]
nvidia-pytriton
nemo_toolkit[nlp] @ git+https://github.com/NVIDIA/NeMo.git@${NEMO_TAG}#egg=nemo_toolkit[nlp]
nvidia-pytriton #==${PYTRITON_VERSION:-0.5.10}
protobuf==${PROTOBUF_VERSION:-4.24.4}
Loading