MHubAI · jithenece · Apr 30, 2024 · May 2, 2024 · May 2, 2024 · May 6, 2024
diff --git a/models/bamf_nnunet_ct_lungnodules/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml
@@ -0,0 +1,44 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for Bamf NNUnet Lung and Nodules segmentation (dicom to dicom)
+
+execute:
+- DicomImporter
+- NiftiConverter
+- module: NNUnetRunner
+  nnunet_task: Task777_CT_Nodules
+  roi: LUNG+NODULE
+- module: NNUnetRunner
+  nnunet_task: Task775_CT_NSCLC_RG
+  roi: LUNG,LUNG+FDG_AVID_TUMOR
+- LungPostProcessor
+- DsegConverter
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: true
+    meta: 
+      mod: '%Modality'
+
+  NNUnetRunner:
+    in_data: nifti:mod=ct
+    nnunet_model: 3d_fullres
+    export_prob_maps: False
+
+  LungPostProcessor:
+    in_rg_data: nifti:mod=seg:nnunet_task=Task775_CT_NSCLC_RG
+    in_nodules_data: nifti:mod=seg:nnunet_task=Task777_CT_Nodules
+
+  DsegConverter:
+    model_name: Bamf CT Lung and Nodule Segmentation
+    target_dicom: dicom:mod=ct
+    source_segs: nifti:mod=seg:processor=bamf
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:sid]/bamf_nnunet_ct_lungnodules.seg.dcm
diff --git a/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile b/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
@@ -0,0 +1,39 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet and platipy
+RUN pip3 install --no-cache-dir nnunet==1.7.1
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_lungnodules ${MHUB_MODELS_REPO}
+
+# Pull nnUNet model weights into the container for Task777_CT_Nodules
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
+RUN mkdir -p $WEIGHTS_DIR
+ENV WEIGHTS_FN=Task777_CT_Nodules.zip
+ENV WEIGHTS_URL=https://zenodo.org/record/8290147/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# Pull nnUNet model weights into the container for Task775_CT_NSCLC_RG
+ENV TASK_NAME_NSCLC_RG=Task775_CT_NSCLC_RG
+ENV WEIGHTS_FN=$TASK_NAME_NSCLC_RG.zip
+ENV WEIGHTS_URL=https://zenodo.org/record/8290169/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_nnunet_ct_lungnodules/config/default.yml"]
diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -0,0 +1,177 @@
+{
+    "id": "",
+    "name": "bamf_nnunet_ct_lungnodules",
+    "title": "AIMI CT Lung and Nodules",
+    "summary": {
+        "description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans",
+        "inputs": [
+            {
+                "label": "Input Image",
+                "description": "The CT scan of a patient.",
+                "format": "DICOM",
+                "modality": "CT",
+                "bodypartexamined": "LUNG",
+                "slicethickness": "10mm",
+                "non-contrast": true,
+                "contrast": false
+            }
+        ],
+        "outputs": [
+            {
+                "label": "Segmentation",
+                "type": "Segmentation",
+                "description": "Lung and Nodules (3mm-30mm) from CT scans",
+                "classes": [
+                    "LUNG",
+                    "LUNG+NODULE"
+                ]
+            }
+        ],
+        "model": {
+            "architecture": "U-net",
+            "training": "supervised",
+            "cmpapproach": "3D"
+        },
+        "data": {
+            "training": {
+                "vol_samples": 1405
+            },
+            "evaluation": {
+                "vol_samples": 47
+            },
+            "public": true,
+            "external": true
+        }
+    },
+    "details": {
+        "name": "AIMI CT Lung and Nodule",
+        "version": "1.0.0",
+        "devteam": "BAMF Health",
+        "authors": [
+            "Soni, Rahul",
+            "McCrumb, Diana",
+            "Murugesan, Gowtham Krishnan",
+            "Van Oss, Jeff"
+        ],
+        "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+        "date": {
+            "code": "17.10.2023",
+            "weights": "28.08.2023",
+            "pub": "23.10.2023"
+        },
+        "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+        "license": {
+            "code": "MIT",
+            "weights": "CC BY-NC 4.0"
+        },
+        "publications": [
+            {
+                "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
+                "uri": "https://arxiv.org/abs/2310.14897"
+            }
+        ],
+        "github": "https://github.com/bamf-health/aimi-lung-ct"
+    },
+    "info": {
+        "use": {
+            "title": "Intended Use",
+            "text": "This model is intended to perform lung and nodules segmentation in CT scans. The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+        },
+        "analyses": {
+            "title": "Quantitative Analyses",
+            "text": "The model's performance was assessed using the Dice Coefficient and 95% Hausdorff (mm). ",
+            "tables": [
+                {
+                    "label": "Segmentation Metric - Lung - Expert 1",
+                    "entries": {
+                        "DSC": "0.99 (0.02)",
+                        "95% Hausdorff (mm)": "2.34 (5.89)"
+                    }
+                },
+                {
+                    "label": "Segmentation Metric - Nodule - Expert 1",
+                    "entries": {
+                        "DSC": "0.60 (0.42)",
+                        "95% Hausdorff (mm)": "56.72 (64.36)"
+                    }
+                },
+                {
+                    "label": "Segmentation Metric - Lung - Expert 2",
+                    "entries": {
+                        "DSC": "1.00 (0.00)",
+                        "95% Hausdorff (mm)": "0.30 (1.70)"
+                    }
+                },
+                {
+                    "label": "Segmentation Metric - Nodule - Expert 2",
+                    "entries": {
+                        "DSC": "0.78 (0.34)",
+                        "95% Hausdorff (mm)": "26.06 (48.63)"
+                    }
+                }
+            ],
+            "references": [
+                {
+                    "label": "The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections",
+                    "uri": "https://arxiv.org/abs/2310.14897"
+                }
+            ]
+        },
+        "evaluation": {
+            "title": "Evaluation Data",
+            "text": "The model was used to segment cases from the IDC [1] collection of ACRIN-NSCLC-FDG-PET [2], Anti-PD-1-Lung [3], LUNG-PET-CT-Dx[4], NSCLC Radiogenomics[5], RIDER Lung PET-CT[6], TCGA-LUAD[7], TCGA-LUSC[8] . Approximately 47 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist. The model predictions, and radiologist corrections are published on zenodo [9]",
+            "references": [
+                {
+                    "label": "Imaging Data Collections (IDC)",
+                    "uri": "https://datacommons.cancer.gov/repository/imaging-data-commons"
+                },
+                {
+                    "label": "ACRIN-NSCLC-FDG-PET",
+                    "uri": "https://www.cancerimagingarchive.net/collection/acrin-nsclc-fdg-pet/"
+                },
+                {
+                    "label": "Anti-PD-1-Lung",
+                    "uri": "https://www.cancerimagingarchive.net/collection/anti-pd-1_lung/"
+                },
+                {
+                    "label": "LUNG-PET-CT-Dx",
+                    "uri": "https://www.cancerimagingarchive.net/collection/lung-pet-ct-dx/"
+                },
+                {
+                    "label": "NSCLC Radiogenomics",
+                    "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiogenomics/"
+                },
+                {
+                    "label": "RIDER Lung PET-CT",
+                    "uri": "https://wiki.cancerimagingarchive.net/display/Public/RIDER+Collections"
+                },
+                {
+                    "label": "TCGA-LUAD",
+                    "uri": "https://www.cancerimagingarchive.net/collection/tcga-luad/"
+                },
+                {
+                    "label": "TCGA-LUSC",
+                    "uri": "https://www.cancerimagingarchive.net/collection/tcga-lusc/"
+                },
+                {
+                    "label": "Image segmentations produced by the AIMI Annotations initiative",
+                    "uri": "https://zenodo.org/records/10009368"
+                }
+            ]
+        },
+        "training": {
+            "title": "Training Data",
+            "text": "The DICOM-LIDC-IDRI-Nodules[1] dataset was used to train an AI model to annotate lung nodules. This dataset included 883 studies with annotated nodules from 875 patients. Within the dataset only nodules that were identified by all four of their radiologists (size condition: 3mm ≤ diameter ≤ 30mm), were considered for AI model training for this task. The lung annotations AI model was trained on 411 and 111 lung CT data from NSCLC Radiomics and NSCLC Radiogenomics respectively[2].",
+            "references": [
+                {
+                    "label": "DICOM-LIDC-IDRI-Nodule",
+                    "uri": "https://doi.org/10.7937/TCIA.2018.h7umfurq"
+                },
+                {
+                    "label": "NSCLC Radiomics",
+                    "uri": "https://doi.org/10.7937/K9/TCIA.2017.7hs46erv"
+                }
+            ]
+        }
+    }
+}
diff --git a/models/bamf_nnunet_ct_lungnodules/utils/LungPostProcessor.py b/models/bamf_nnunet_ct_lungnodules/utils/LungPostProcessor.py
@@ -0,0 +1,100 @@
+"""
+-------------------------------------------------
+MHub - Run Module for perform postprocessing logic on segmentations.
+-------------------------------------------------
+-------------------------------------------------
+Author: Jithendra Kumar
+Email:  jithendra.kumar@bamfhealth.com
+-------------------------------------------------
+"""
+from mhubio.core import IO
+from mhubio.core import Module, Instance, InstanceData
+import SimpleITK as sitk
+import numpy as np
+from skimage import measure
+
+
+class LungPostProcessor(Module):
+
+    def perform_binary_threshold_segmentation(self, ip_path):
+        """
+        Perform binary threshold segmentation on the input image.
+
+        Args:
+        - ip_path (str): Path to the input image file.
+
+        Returns:
+        - numpy.ndarray: Segmented binary mask where non-zero values represent the segmented region.
+        """
+        seg_data = sitk.GetArrayFromImage(sitk.ReadImage(ip_path))
+        seg_data[seg_data > 0] = 1
+        return seg_data
+
+    def extract_largest_connected_component(self, img_data):
+        """
+        Extract the single largest connected component from the segmentation image data.
+
+        Args:
+        - img_data (numpy.ndarray): Segmentation image data where connected components are to be identified.
+
+        Returns:
+        - numpy.ndarray: Binary image data with only the largest connected component retained.
+        """
+        img_data_mask = np.zeros(img_data.shape)
+        img_data_mask[img_data >= 1] = 1
+        img_filtered = np.zeros(img_data_mask.shape)
+        blobs_labels = measure.label(img_data_mask, background=0)
+        lbl, counts = np.unique(blobs_labels, return_counts=True)
+        lbl_dict = {}
+        for i, j in zip(lbl, counts):
+            lbl_dict[i] = j
+        sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
+        count = 0
+
+        for key, value in sorted_dict.items():
+            if count >= 1 and count <= 2:
+                img_filtered[blobs_labels == key] = 1
+            count += 1
+
+        img_data[img_filtered != 1] = 0
+        return img_data
+
+    def create_segmentation_image(self, lungs, nodules, ct_path):
+        """
+        Create a segmentation image combining lung and nodule segmentations.
+
+        Args:
+        - lungs (numpy.ndarray): Binary mask of lung segmentation.
+        - nodules (numpy.ndarray): Binary mask of nodule segmentation.
+        - ct_path (str): Path to the original CT image used as reference.
+
+        Returns:
+        - SimpleITK.Image: Segmentation image where lung and nodule regions are labeled as 1 and 2, respectively.
+        """
+        seg_data = np.zeros(lungs.shape)
+        seg_data[lungs == 1] = 1
+        seg_data[nodules == 1] = 2
+        ref = sitk.ReadImage(ct_path)
+        seg_img = sitk.GetImageFromArray(seg_data)
+        seg_img.CopyInformation(ref)
+        return seg_img
+
+    @IO.Instance()
+    @IO.Input('in_rg_data', 'nifti:mod=seg:nnunet_task=Task775_CT_NSCLC_RG', the='input data from lung nnunet module')
+    @IO.Input('in_nodules_data', 'nifti:mod=seg:nnunet_task=Task777_CT_Nodules', the='input data from nodules nnunet nodule')
+    @IO.Input('in_ct_data', 'nifti:mod=ct', the='input ct data')
+    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE',
+               data='in_rg_data', the="get lung and lung nodule segmentation file")
+    def task(self, instance: Instance, in_rg_data: InstanceData, in_nodules_data: InstanceData,
+             in_ct_data: InstanceData, out_data: InstanceData):
+
+        self.v('running LungPostProcessor')
+
+        seg_data = self.perform_binary_threshold_segmentation(in_rg_data.abspath)
+        lungs = self.extract_largest_connected_component(seg_data)
+
+        nodules = self.perform_binary_threshold_segmentation(in_nodules_data.abspath)
+        nodules[lungs == 0] = 0
+
+        final_seg_img = self.create_segmentation_image(np.copy(lungs), np.copy(nodules), in_ct_data.abspath)
+        sitk.WriteImage(final_seg_img,out_data.abspath)