Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
general:
data_base_dir: /app/data
version: 1.0
description: default configuration for Bamf NNUnet Lung and Nodules segmentation (dicom to dicom)

execute:
- DicomImporter
- NiftiConverter
- module: NNUnetRunner
nnunet_task: Task777_CT_Nodules
roi: LUNG+NODULE
- module: NNUnetRunner
nnunet_task: Task775_CT_NSCLC_RG
roi: LUNG,LUNG+FDG_AVID_TUMOR
- LungPostProcessor
- DsegConverter
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: true
meta:
mod: '%Modality'

NNUnetRunner:
in_data: nifti:mod=ct
nnunet_model: 3d_fullres
export_prob_maps: False

LungPostProcessor:
in_rg_data: nifti:mod=seg:nnunet_task=Task775_CT_NSCLC_RG
in_nodules_data: nifti:mod=seg:nnunet_task=Task777_CT_Nodules

DsegConverter:
model_name: Bamf CT Lung and Nodule Segmentation
target_dicom: dicom:mod=ct
source_segs: nifti:mod=seg:processor=bamf
skip_empty_slices: True

DataOrganizer:
targets:
- dicomseg-->[i:sid]/bamf_nnunet_ct_lungnodules.seg.dcm
39 changes: 39 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM mhubai/base:latest

# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
# by pulling sklearn instead of scikit-learn
# N.B. this is a known issue:
# https://github.com/MIC-DKFZ/nnUNet/issues/1281
# https://github.com/MIC-DKFZ/nnUNet/pull/1209
ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

# Install nnunet and platipy
RUN pip3 install --no-cache-dir nnunet==1.7.1

# Clone the main branch of MHubAI/models
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_lungnodules ${MHUB_MODELS_REPO}

# Pull nnUNet model weights into the container for Task777_CT_Nodules
ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
RUN mkdir -p $WEIGHTS_DIR
ENV WEIGHTS_FN=Task777_CT_Nodules.zip
ENV WEIGHTS_URL=https://zenodo.org/record/8290147/files/$WEIGHTS_FN
RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}

# Pull nnUNet model weights into the container for Task775_CT_NSCLC_RG
ENV TASK_NAME_NSCLC_RG=Task775_CT_NSCLC_RG
ENV WEIGHTS_FN=$TASK_NAME_NSCLC_RG.zip
ENV WEIGHTS_URL=https://zenodo.org/record/8290169/files/$WEIGHTS_FN
RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}

# specify nnunet specific environment variables
ENV WEIGHTS_FOLDER=$WEIGHTS_DIR

# Default run script
ENTRYPOINT ["mhub.run"]
CMD ["--config", "/app/models/bamf_nnunet_ct_lungnodules/config/default.yml"]
177 changes: 177 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
{
"id": "",
"name": "bamf_nnunet_ct_lungnodules",
"title": "AIMI CT Lung and Nodules",
"summary": {
"description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans",
"inputs": [
{
"label": "Input Image",
"description": "The CT scan of a patient.",
"format": "DICOM",
"modality": "CT",
"bodypartexamined": "LUNG",
"slicethickness": "10mm",
"non-contrast": true,
"contrast": false
}
],
"outputs": [
{
"label": "Segmentation",
"type": "Segmentation",
"description": "Lung and Nodules (3mm-30mm) from CT scans",
"classes": [
"LUNG",
"LUNG+NODULE"
]
}
],
"model": {
"architecture": "U-net",
"training": "supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 1405
},
"evaluation": {
"vol_samples": 47
},
"public": true,
"external": true
}
},
"details": {
"name": "AIMI CT Lung and Nodule",
"version": "1.0.0",
"devteam": "BAMF Health",
"authors": [
"Soni, Rahul",
"McCrumb, Diana",
"Murugesan, Gowtham Krishnan",
"Van Oss, Jeff"
],
"type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
"date": {
"code": "17.10.2023",
"weights": "28.08.2023",
"pub": "23.10.2023"
},
"cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
"license": {
"code": "MIT",
"weights": "CC BY-NC 4.0"
},
"publications": [
{
"title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
"uri": "https://arxiv.org/abs/2310.14897"
}
],
"github": "https://github.com/bamf-health/aimi-lung-ct"
},
"info": {
"use": {
"title": "Intended Use",
"text": "This model is intended to perform lung and nodules segmentation in CT scans. The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
},
"analyses": {
"title": "Quantitative Analyses",
"text": "The model's performance was assessed using the Dice Coefficient and 95% Hausdorff (mm). ",
"tables": [
{
"label": "Segmentation Metric - Lung - Expert 1",
"entries": {
"DSC": "0.99 (0.02)",
"95% Hausdorff (mm)": "2.34 (5.89)"
}
},
{
"label": "Segmentation Metric - Nodule - Expert 1",
"entries": {
"DSC": "0.60 (0.42)",
"95% Hausdorff (mm)": "56.72 (64.36)"
}
},
{
"label": "Segmentation Metric - Lung - Expert 2",
"entries": {
"DSC": "1.00 (0.00)",
"95% Hausdorff (mm)": "0.30 (1.70)"
}
},
{
"label": "Segmentation Metric - Nodule - Expert 2",
"entries": {
"DSC": "0.78 (0.34)",
"95% Hausdorff (mm)": "26.06 (48.63)"
}
}
],
"references": [
{
"label": "The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections",
"uri": "https://arxiv.org/abs/2310.14897"
}
]
},
"evaluation": {
"title": "Evaluation Data",
"text": "The model was used to segment cases from the IDC [1] collection of ACRIN-NSCLC-FDG-PET [2], Anti-PD-1-Lung [3], LUNG-PET-CT-Dx[4], NSCLC Radiogenomics[5], RIDER Lung PET-CT[6], TCGA-LUAD[7], TCGA-LUSC[8] . Approximately 47 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist. The model predictions, and radiologist corrections are published on zenodo [9]",
"references": [
{
"label": "Imaging Data Collections (IDC)",
"uri": "https://datacommons.cancer.gov/repository/imaging-data-commons"
},
{
"label": "ACRIN-NSCLC-FDG-PET",
"uri": "https://www.cancerimagingarchive.net/collection/acrin-nsclc-fdg-pet/"
},
{
"label": "Anti-PD-1-Lung",
"uri": "https://www.cancerimagingarchive.net/collection/anti-pd-1_lung/"
},
{
"label": "LUNG-PET-CT-Dx",
"uri": "https://www.cancerimagingarchive.net/collection/lung-pet-ct-dx/"
},
{
"label": "NSCLC Radiogenomics",
"uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiogenomics/"
},
{
"label": "RIDER Lung PET-CT",
"uri": "https://wiki.cancerimagingarchive.net/display/Public/RIDER+Collections"
},
{
"label": "TCGA-LUAD",
"uri": "https://www.cancerimagingarchive.net/collection/tcga-luad/"
},
{
"label": "TCGA-LUSC",
"uri": "https://www.cancerimagingarchive.net/collection/tcga-lusc/"
},
{
"label": "Image segmentations produced by the AIMI Annotations initiative",
"uri": "https://zenodo.org/records/10009368"
}
]
},
"training": {
"title": "Training Data",
"text": "The DICOM-LIDC-IDRI-Nodules[1] dataset was used to train an AI model to annotate lung nodules. This dataset included 883 studies with annotated nodules from 875 patients. Within the dataset only nodules that were identified by all four of their radiologists (size condition: 3mm ≤ diameter ≤ 30mm), were considered for AI model training for this task. The lung annotations AI model was trained on 411 and 111 lung CT data from NSCLC Radiomics and NSCLC Radiogenomics respectively[2].",
"references": [
{
"label": "DICOM-LIDC-IDRI-Nodule",
"uri": "https://doi.org/10.7937/TCIA.2018.h7umfurq"
},
{
"label": "NSCLC Radiomics",
"uri": "https://doi.org/10.7937/K9/TCIA.2017.7hs46erv"
}
]
}
}
}
100 changes: 100 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/utils/LungPostProcessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
-------------------------------------------------
MHub - Run Module for perform postprocessing logic on segmentations.
-------------------------------------------------
-------------------------------------------------
Author: Jithendra Kumar
Email: jithendra.kumar@bamfhealth.com
-------------------------------------------------
"""
from mhubio.core import IO
from mhubio.core import Module, Instance, InstanceData
import SimpleITK as sitk
import numpy as np
from skimage import measure


class LungPostProcessor(Module):

def perform_binary_threshold_segmentation(self, ip_path):
"""
Perform binary threshold segmentation on the input image.

Args:
- ip_path (str): Path to the input image file.

Returns:
- numpy.ndarray: Segmented binary mask where non-zero values represent the segmented region.
"""
seg_data = sitk.GetArrayFromImage(sitk.ReadImage(ip_path))
seg_data[seg_data > 0] = 1
return seg_data

def extract_largest_connected_component(self, img_data):
"""
Extract the single largest connected component from the segmentation image data.

Args:
- img_data (numpy.ndarray): Segmentation image data where connected components are to be identified.

Returns:
- numpy.ndarray: Binary image data with only the largest connected component retained.
"""
img_data_mask = np.zeros(img_data.shape)
img_data_mask[img_data >= 1] = 1
img_filtered = np.zeros(img_data_mask.shape)
blobs_labels = measure.label(img_data_mask, background=0)
lbl, counts = np.unique(blobs_labels, return_counts=True)
lbl_dict = {}
for i, j in zip(lbl, counts):
lbl_dict[i] = j
sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
count = 0

for key, value in sorted_dict.items():
if count >= 1 and count <= 2:
img_filtered[blobs_labels == key] = 1
count += 1

img_data[img_filtered != 1] = 0
return img_data

def create_segmentation_image(self, lungs, nodules, ct_path):
"""
Create a segmentation image combining lung and nodule segmentations.

Args:
- lungs (numpy.ndarray): Binary mask of lung segmentation.
- nodules (numpy.ndarray): Binary mask of nodule segmentation.
- ct_path (str): Path to the original CT image used as reference.

Returns:
- SimpleITK.Image: Segmentation image where lung and nodule regions are labeled as 1 and 2, respectively.
"""
seg_data = np.zeros(lungs.shape)
seg_data[lungs == 1] = 1
seg_data[nodules == 1] = 2
ref = sitk.ReadImage(ct_path)
seg_img = sitk.GetImageFromArray(seg_data)
seg_img.CopyInformation(ref)
return seg_img

@IO.Instance()
@IO.Input('in_rg_data', 'nifti:mod=seg:nnunet_task=Task775_CT_NSCLC_RG', the='input data from lung nnunet module')
@IO.Input('in_nodules_data', 'nifti:mod=seg:nnunet_task=Task777_CT_Nodules', the='input data from nodules nnunet nodule')
@IO.Input('in_ct_data', 'nifti:mod=ct', the='input ct data')
@IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE',
data='in_rg_data', the="get lung and lung nodule segmentation file")
def task(self, instance: Instance, in_rg_data: InstanceData, in_nodules_data: InstanceData,
in_ct_data: InstanceData, out_data: InstanceData):

self.v('running LungPostProcessor')

seg_data = self.perform_binary_threshold_segmentation(in_rg_data.abspath)
lungs = self.extract_largest_connected_component(seg_data)

nodules = self.perform_binary_threshold_segmentation(in_nodules_data.abspath)
nodules[lungs == 0] = 0

final_seg_img = self.create_segmentation_image(np.copy(lungs), np.copy(nodules), in_ct_data.abspath)
sitk.WriteImage(final_seg_img,out_data.abspath)