From 7bbaac8de3d9844c7f02d805794173eeffc82811 Mon Sep 17 00:00:00 2001 From: jreadey Date: Sun, 31 Mar 2024 15:13:41 -0700 Subject: [PATCH 01/25] setup docker-compose for HSDS --- .bashrc | 17 ++++++++++ .devcontainer/docker-compose.yaml | 54 +++++++++++++++++++++++++++++++ hsds/data/readme.txt | 1 + 3 files changed, 72 insertions(+) create mode 100644 .bashrc create mode 100644 .devcontainer/docker-compose.yaml create mode 100644 hsds/data/readme.txt diff --git a/.bashrc b/.bashrc new file mode 100644 index 0000000..23037c4 --- /dev/null +++ b/.bashrc @@ -0,0 +1,17 @@ +# setup env variables for HSDS +export ROOT_DIR=${HOME}/hsds/data +export BUCKET_NAME=hsdstest +export HEAD_PORT=5100 +export HEAD_RAM=512m +export SN_PORT=5101 +export SN_RAM=3g +export SN_CORES=1 +export DN_PORT=6101 +export DN_RAM=3g +export DN_CORES=4 +export LOG_LEVEL=DEBUG +export HSDS_ENDPOINT=http://localhost:${SN_PORT} + +# environment variables for HSDS clients +export HS_USERNAME=${USER} +export HS_PASSWORD=${USER} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml new file mode 100644 index 0000000..2ab964d --- /dev/null +++ b/.devcontainer/docker-compose.yaml @@ -0,0 +1,54 @@ +version: "2" +services: + head: + image: hdfgroup/hsds:master + restart: ${RESTART_POLICY} + mem_limit: ${HEAD_RAM} + environment: + - TARGET_SN_COUNT=${SN_CORES} + - TARGET_DN_COUNT=${DN_CORES} + - NODE_TYPE=head_node + - ROOT_DIR=/data + - BUCKET_NAME=${BUCKET_NAME} + ports: + - ${HEAD_PORT} + volumes: + - ${ROOT_DIR}:/data + - ${PWD}/admin/config/:/config/ + + dn: + image: hdfgroup/hsds + restart: ${RESTART_POLICY} + mem_limit: ${DN_RAM} + environment: + - NODE_TYPE=dn + - ROOT_DIR=/data + - BUCKET_NAME=${BUCKET_NAME} + ports: + - ${DN_PORT} + depends_on: + - head + volumes: + - ${ROOT_DIR}:/data + - ${PWD}/admin/config/:/config/ + links: + - head + sn: + image: hdfgroup/hsds + restart: ${RESTART_POLICY} + mem_limit: ${SN_RAM} + environment: + - SN_PORT=${SN_PORT} + - NODE_TYPE=sn + - ROOT_DIR=/data + - BUCKET_NAME=${BUCKET_NAME} + - HSDS_ENDPOINT=${HSDS_ENDPOINT} + ports: + - ${SN_PORT_RANGE}:${SN_PORT} + depends_on: + - head + volumes: + - ${ROOT_DIR}:/data + - ${PWD}/admin/config/:/config/ + links: + - head diff --git a/hsds/data/readme.txt b/hsds/data/readme.txt new file mode 100644 index 0000000..3d18e5d --- /dev/null +++ b/hsds/data/readme.txt @@ -0,0 +1 @@ +This is the root directory for HSDS data From 8da338bbf3ce286b52f58bf43278917d8efc2757 Mon Sep 17 00:00:00 2001 From: jreadey Date: Sun, 31 Mar 2024 15:35:53 -0700 Subject: [PATCH 02/25] set base image --- .devcontainer/devcontainer.json | 30 +++++++++++++----------------- .devcontainer/docker-compose.yaml | 8 +++----- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e3af3ee..7f1236b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,26 +1,22 @@ { + "image": "mcr.microsoft.com/devcontainers/universal:2", "name": "HDF5 Tutorial", - "build": { - "context": "..", - "dockerfile": "Dockerfile" + "hostRequirements": { + "cpus": 4 }, - "postCreateCommand": "conda init", + "waitFor": "onCreateCommand", + "updateContentCommand": "python3 -m pip install -r requirements.txt", + "postCreateCommand": "", "customizations": { + "codespaces": { + "openFiles": [] + }, "vscode": { "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter", - "ms-vscode.cpptools", - "h5web.vscode-h5web", - "davidanson.vscode-markdownlint" - ], - "settings": { - "C_Cpp.default.cppStandard": "c++17", - "C_Cpp.default.cStandard": "c99", - "python.defaultInterpreterPath": "/opt/conda/envs/hdf5-tutorial/python", - "python.languageServer": "Default", - "terminal.integrated.shell.linux": "/bin/bash" - } + "ms-toolsai.jupyter", + "ms-python.python" + ] } } } + diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 2ab964d..b8f9e9b 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -2,7 +2,7 @@ version: "2" services: head: image: hdfgroup/hsds:master - restart: ${RESTART_POLICY} + restart: on-failure mem_limit: ${HEAD_RAM} environment: - TARGET_SN_COUNT=${SN_CORES} @@ -18,7 +18,7 @@ services: dn: image: hdfgroup/hsds - restart: ${RESTART_POLICY} + restart: on-failure mem_limit: ${DN_RAM} environment: - NODE_TYPE=dn @@ -30,12 +30,11 @@ services: - head volumes: - ${ROOT_DIR}:/data - - ${PWD}/admin/config/:/config/ links: - head sn: image: hdfgroup/hsds - restart: ${RESTART_POLICY} + restart: on-failure mem_limit: ${SN_RAM} environment: - SN_PORT=${SN_PORT} @@ -49,6 +48,5 @@ services: - head volumes: - ${ROOT_DIR}:/data - - ${PWD}/admin/config/:/config/ links: - head From e72424da6bd3cece0203b36a61d7df34d96375c6 Mon Sep 17 00:00:00 2001 From: jreadey Date: Sun, 31 Mar 2024 15:49:23 -0700 Subject: [PATCH 03/25] set hsds env in Dockerfile --- .bashrc | 18 +----------------- .devcontainer/Dockerfile | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.bashrc b/.bashrc index 23037c4..10486fa 100644 --- a/.bashrc +++ b/.bashrc @@ -1,17 +1 @@ -# setup env variables for HSDS -export ROOT_DIR=${HOME}/hsds/data -export BUCKET_NAME=hsdstest -export HEAD_PORT=5100 -export HEAD_RAM=512m -export SN_PORT=5101 -export SN_RAM=3g -export SN_CORES=1 -export DN_PORT=6101 -export DN_RAM=3g -export DN_CORES=4 -export LOG_LEVEL=DEBUG -export HSDS_ENDPOINT=http://localhost:${SN_PORT} - -# environment variables for HSDS clients -export HS_USERNAME=${USER} -export HS_PASSWORD=${USER} \ No newline at end of file +# add any special user configuration settings here \ No newline at end of file diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 602f49e..13e0045 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,3 +12,21 @@ RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bi RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev + +# setup env variables for HSDS +ENV ROOT_DIR=/home/codespace/hsds/data +ENV BUCKET_NAME=hsdstest +ENV HEAD_PORT=5100 +ENV HEAD_RAM=512m +ENV SN_PORT=5101 +ENV SN_RAM=3g +ENV SN_CORES=1 +ENV DN_PORT=6101 +ENV DN_RAM=3g +ENV DN_CORES=4 +ENV LOG_LEVEL=DEBUG +ENV HSDS_ENDPOINT=http://localhost:${SN_PORT} +# environment variables for HSDS clients +ENV HS_USERNAME=${USER} +ENV HS_PASSWORD=${USER} + From b7ee0ee86d872968da5c2cc365ed241dc0e746ab Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 05:11:20 -0700 Subject: [PATCH 04/25] set dockerComposeFile --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 7f1236b..406bbe8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,6 @@ { - "image": "mcr.microsoft.com/devcontainers/universal:2", "name": "HDF5 Tutorial", + "dockerComposeFile": "docker-compose.yaml", "hostRequirements": { "cpus": 4 }, From 8a032538c203cc574f88bcdc620b9be4cc43f5fe Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 05:20:51 -0700 Subject: [PATCH 05/25] fix for docker-compose.yaml --- .devcontainer/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 13e0045..2d0f935 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -25,8 +25,8 @@ ENV DN_PORT=6101 ENV DN_RAM=3g ENV DN_CORES=4 ENV LOG_LEVEL=DEBUG -ENV HSDS_ENDPOINT=http://localhost:${SN_PORT} +ENV HSDS_ENDPOINT=http://localhost:5101 # environment variables for HSDS clients -ENV HS_USERNAME=${USER} -ENV HS_PASSWORD=${USER} +ENV HS_USERNAME=vscode +ENV HS_PASSWORD=vscode From 4dc3559f07c0881fc4e68023b833efbf69550af6 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 05:32:47 -0700 Subject: [PATCH 06/25] remove env from docker-compose --- .devcontainer/Dockerfile | 18 ---------------- .devcontainer/docker-compose.yaml | 34 +++++++++++++++---------------- 2 files changed, 16 insertions(+), 36 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 2d0f935..602f49e 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,21 +12,3 @@ RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bi RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev - -# setup env variables for HSDS -ENV ROOT_DIR=/home/codespace/hsds/data -ENV BUCKET_NAME=hsdstest -ENV HEAD_PORT=5100 -ENV HEAD_RAM=512m -ENV SN_PORT=5101 -ENV SN_RAM=3g -ENV SN_CORES=1 -ENV DN_PORT=6101 -ENV DN_RAM=3g -ENV DN_CORES=4 -ENV LOG_LEVEL=DEBUG -ENV HSDS_ENDPOINT=http://localhost:5101 -# environment variables for HSDS clients -ENV HS_USERNAME=vscode -ENV HS_PASSWORD=vscode - diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index b8f9e9b..cc6a4ab 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -3,50 +3,48 @@ services: head: image: hdfgroup/hsds:master restart: on-failure - mem_limit: ${HEAD_RAM} + mem_limit: 512m environment: - - TARGET_SN_COUNT=${SN_CORES} - - TARGET_DN_COUNT=${DN_CORES} + - TARGET_SN_COUNT=4 + - TARGET_DN_COUNT=4 - NODE_TYPE=head_node - ROOT_DIR=/data - - BUCKET_NAME=${BUCKET_NAME} + - BUCKET_NAME=hsdstest ports: - - ${HEAD_PORT} + - 5100 volumes: - - ${ROOT_DIR}:/data - - ${PWD}/admin/config/:/config/ - + - /home/codespace/hsds/data:/data dn: image: hdfgroup/hsds restart: on-failure - mem_limit: ${DN_RAM} + mem_limit: 1g environment: - NODE_TYPE=dn - ROOT_DIR=/data - - BUCKET_NAME=${BUCKET_NAME} + - BUCKET_NAME=hsdstest ports: - - ${DN_PORT} + - 6101 depends_on: - head volumes: - - ${ROOT_DIR}:/data + - /home/codespace/hsds/data:/data links: - head sn: image: hdfgroup/hsds restart: on-failure - mem_limit: ${SN_RAM} + mem_limit: 1g environment: - - SN_PORT=${SN_PORT} + - SN_PORT=510 - NODE_TYPE=sn - ROOT_DIR=/data - - BUCKET_NAME=${BUCKET_NAME} - - HSDS_ENDPOINT=${HSDS_ENDPOINT} + - BUCKET_NAME=hsdstest + - HSDS_ENDPOINT=http://localhost:5101 ports: - - ${SN_PORT_RANGE}:${SN_PORT} + - 5101:5101 depends_on: - head volumes: - - ${ROOT_DIR}:/data + - /home/codespace/hsds/data:/data links: - head From 79dd4caaa9798157985e8da3d7bea0c2d0005ec5 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 05:38:12 -0700 Subject: [PATCH 07/25] fix docker-compose options --- .devcontainer/docker-compose.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index cc6a4ab..d620890 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -15,7 +15,7 @@ services: volumes: - /home/codespace/hsds/data:/data dn: - image: hdfgroup/hsds + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: @@ -31,11 +31,11 @@ services: links: - head sn: - image: hdfgroup/hsds + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: - - SN_PORT=510 + - SN_PORT=5101 - NODE_TYPE=sn - ROOT_DIR=/data - BUCKET_NAME=hsdstest From 1d165ff01bb054fd8b4564ead1ac1a50a0cada65 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 06:21:58 -0700 Subject: [PATCH 08/25] add app to docker-compose --- .devcontainer/devcontainer.json | 35 ++++++++++++++++++------------- .devcontainer/docker-compose.yaml | 14 ++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 406bbe8..eecfcce 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,22 +1,29 @@ { "name": "HDF5 Tutorial", "dockerComposeFile": "docker-compose.yaml", - "hostRequirements": { - "cpus": 4 - }, - "waitFor": "onCreateCommand", - "updateContentCommand": "python3 -m pip install -r requirements.txt", - "postCreateCommand": "", + "service": "app", + "workspaceFolder": "/workspace", + "forwardPorts": [5101], + "portsAttributes": { + "5101": {"label": "HSDS port", "onAutoForward": "silent"} + }, + "customizations": { - "codespaces": { - "openFiles": [] - }, "vscode": { "extensions": [ - "ms-toolsai.jupyter", - "ms-python.python" - ] + "ms-python.python", + "ms-toolsai.jupyter", + "ms-vscode.cpptools", + "h5web.vscode-h5web", + "davidanson.vscode-markdownlint" + ], + "settings": { + "C_Cpp.default.cppStandard": "c++17", + "C_Cpp.default.cStandard": "c99", + "python.defaultInterpreterPath": "/opt/conda/envs/hdf5-tutorial/python", + "python.languageServer": "Default", + "terminal.integrated.shell.linux": "/bin/bash" + } } } -} - +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index d620890..ca25e40 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -1,5 +1,17 @@ -version: "2" +version: "3" services: + app: + build: + context: .. + dockerfile: .devcontainer/Dockerfile + volumes: + - ..:/workspace:cached + + # Overrides default command so things don't shut down after the process ends. + command: sleep infinity + + # Runs app on the same network as the SN container, allows "forwardPorts" in devcontainer.json function. + network_mode: service:sn head: image: hdfgroup/hsds:master restart: on-failure From 4a964926f658b7c6391cbeb7cfbc94b51b3cddf1 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 08:31:22 -0700 Subject: [PATCH 09/25] added hsds config --- .devcontainer/docker-compose.yaml | 7 +++++-- .hscfg | 4 ++++ hsds/config/override.yml | 8 ++++++++ hsds/config/passwd.txt | 2 ++ hsds/data/hsdstest/readme.txt | 1 + 5 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 .hscfg create mode 100644 hsds/config/override.yml create mode 100644 hsds/config/passwd.txt create mode 100644 hsds/data/hsdstest/readme.txt diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index ca25e40..c811e0e 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -17,7 +17,7 @@ services: restart: on-failure mem_limit: 512m environment: - - TARGET_SN_COUNT=4 + - TARGET_SN_COUNT=1 - TARGET_DN_COUNT=4 - NODE_TYPE=head_node - ROOT_DIR=/data @@ -26,6 +26,7 @@ services: - 5100 volumes: - /home/codespace/hsds/data:/data + - /home/hsds/config/:/config/ dn: image: hdfgroup/hsds:master restart: on-failure @@ -40,6 +41,7 @@ services: - head volumes: - /home/codespace/hsds/data:/data + - /home/hsds/config/:/config/ links: - head sn: @@ -53,10 +55,11 @@ services: - BUCKET_NAME=hsdstest - HSDS_ENDPOINT=http://localhost:5101 ports: - - 5101:5101 + - 5101-5104:5101 depends_on: - head volumes: - /home/codespace/hsds/data:/data + - /home/hsds/config/:/config/ links: - head diff --git a/.hscfg b/.hscfg new file mode 100644 index 0000000..504d281 --- /dev/null +++ b/.hscfg @@ -0,0 +1,4 @@ +hs_endpoint = http://localhost:5101 +hs_username = vscode +hs_password = vscode + diff --git a/hsds/config/override.yml b/hsds/config/override.yml new file mode 100644 index 0000000..5f66abc --- /dev/null +++ b/hsds/config/override.yml @@ -0,0 +1,8 @@ +log_level: DEBUG +bucket_name: hsdstest +server_name: HSDS for Github codespaces +chunk_mem_cache_size: 256m +root_dir: /workspace/hsds/data +head_sleep_time: 60 # max sleep time between health checks for head node +node_sleep_time: 60 # max sleep time between health checks for SN/DN nodes + diff --git a/hsds/config/passwd.txt b/hsds/config/passwd.txt new file mode 100644 index 0000000..63a9849 --- /dev/null +++ b/hsds/config/passwd.txt @@ -0,0 +1,2 @@ +# username/passwords for HSDS... +vscode:vscode diff --git a/hsds/data/hsdstest/readme.txt b/hsds/data/hsdstest/readme.txt new file mode 100644 index 0000000..d43d778 --- /dev/null +++ b/hsds/data/hsdstest/readme.txt @@ -0,0 +1 @@ +This is the default bucket for HSDS From 12ee75a11bd37cd5c89dcdba1f591b5f936c7257 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 09:02:20 -0700 Subject: [PATCH 10/25] add env var to app container --- .bashrc | 1 - .devcontainer/Dockerfile | 1 + .devcontainer/docker-compose.yaml | 4 ++++ .hscfg | 4 ---- 4 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 .bashrc delete mode 100644 .hscfg diff --git a/.bashrc b/.bashrc deleted file mode 100644 index 10486fa..0000000 --- a/.bashrc +++ /dev/null @@ -1 +0,0 @@ -# add any special user configuration settings here \ No newline at end of file diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 602f49e..1ab16c0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,3 +12,4 @@ RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bi RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev +RUN conda install -n base h5py, h5pyd diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index c811e0e..ddc1a34 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -4,6 +4,10 @@ services: build: context: .. dockerfile: .devcontainer/Dockerfile + environment: + - HS_ENDPOINT=http://localhost:5101 + - HS_USERNAME=vscode + - HS_PASSWORD=vscode volumes: - ..:/workspace:cached diff --git a/.hscfg b/.hscfg deleted file mode 100644 index 504d281..0000000 --- a/.hscfg +++ /dev/null @@ -1,4 +0,0 @@ -hs_endpoint = http://localhost:5101 -hs_username = vscode -hs_password = vscode - From a623add66d641620863105e828d82136a62f872b Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 09:38:08 -0700 Subject: [PATCH 11/25] install pip3 --- .devcontainer/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 1ab16c0..33b63ea 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -11,5 +11,6 @@ RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bi RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ - libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev -RUN conda install -n base h5py, h5pyd + libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev python3-pip +RUN conda install -n base h5py +RUN pip3 install h5pyd From 308fd28a4830c158096c4f8db9ca5695970711c5 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 10:02:47 -0700 Subject: [PATCH 12/25] fix mount pts --- .devcontainer/docker-compose.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index ddc1a34..708aabb 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -29,8 +29,8 @@ services: ports: - 5100 volumes: - - /home/codespace/hsds/data:/data - - /home/hsds/config/:/config/ + - ../hsds/data:/data + - ../hsds/config/:/config/ dn: image: hdfgroup/hsds:master restart: on-failure @@ -44,8 +44,8 @@ services: depends_on: - head volumes: - - /home/codespace/hsds/data:/data - - /home/hsds/config/:/config/ + - ../hsds/data:/data + - ../hsds/config/:/config/ links: - head sn: @@ -63,7 +63,7 @@ services: depends_on: - head volumes: - - /home/codespace/hsds/data:/data - - /home/hsds/config/:/config/ + - ../hsds/data:/data + - ../hsds/config/:/config/ links: - head From 33085ce5ea9bf3964f24e4266e5c282574727d0d Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 1 Apr 2024 10:33:11 -0700 Subject: [PATCH 13/25] add default hsds folders --- hsds/data/hsdstest/home/.domain.json | 1 + hsds/data/hsdstest/home/vscode/.domain.json | 1 + 2 files changed, 2 insertions(+) create mode 100644 hsds/data/hsdstest/home/.domain.json create mode 100644 hsds/data/hsdstest/home/vscode/.domain.json diff --git a/hsds/data/hsdstest/home/.domain.json b/hsds/data/hsdstest/home/.domain.json new file mode 100644 index 0000000..d6f9199 --- /dev/null +++ b/hsds/data/hsdstest/home/.domain.json @@ -0,0 +1 @@ +{"owner": "admin", "acls": {"admin": {"create": true, "read": true, "update": true, "delete": true, "readACL": true, "updateACL": true}, "default": {"create": false, "read": true, "update": false, "delete": false, "readACL": false, "updateACL": false}}, "created": 1708897646.0599918, "lastModified": 1708897646.0599918} \ No newline at end of file diff --git a/hsds/data/hsdstest/home/vscode/.domain.json b/hsds/data/hsdstest/home/vscode/.domain.json new file mode 100644 index 0000000..8a27da6 --- /dev/null +++ b/hsds/data/hsdstest/home/vscode/.domain.json @@ -0,0 +1 @@ +{"owner": "vscode", "acls": {"vscode": {"create": true, "read": true, "update": true, "delete": true, "readACL": true, "updateACL": true}, "default": {"create": false, "read": true, "update": false, "delete": false, "readACL": false, "updateACL": false}}, "created": 1711992550.3733413, "lastModified": 1711992550.3733413} \ No newline at end of file From dd3ec356cd89461fa0762ba5455aedbb091577e7 Mon Sep 17 00:00:00 2001 From: jreadey Date: Thu, 4 Apr 2024 05:55:30 -0700 Subject: [PATCH 14/25] updated hsds image --- .devcontainer/docker-compose.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 708aabb..6329dfe 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -17,7 +17,7 @@ services: # Runs app on the same network as the SN container, allows "forwardPorts" in devcontainer.json function. network_mode: service:sn head: - image: hdfgroup/hsds:master + image: hdfgroup/hsds:sha-ed38987 restart: on-failure mem_limit: 512m environment: @@ -32,7 +32,7 @@ services: - ../hsds/data:/data - ../hsds/config/:/config/ dn: - image: hdfgroup/hsds:master + image: hdfgroup/hsds:sha-ed38987 restart: on-failure mem_limit: 1g environment: @@ -49,7 +49,7 @@ services: links: - head sn: - image: hdfgroup/hsds:master + image: hdfgroup/hsds:sha-ed38987 restart: on-failure mem_limit: 1g environment: From 2e5cb2361963f8b457347bba7f9cfed63fcc315c Mon Sep 17 00:00:00 2001 From: jreadey Date: Thu, 4 Apr 2024 09:06:19 -0700 Subject: [PATCH 15/25] add s3fs to dockerfile --- .devcontainer/Dockerfile | 2 +- hsds/data/hsdstest/db/.gitignore | 0 hsds/data/hsdstest/home/vscode/.gitignore | 0 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 hsds/data/hsdstest/db/.gitignore create mode 100644 hsds/data/hsdstest/home/vscode/.gitignore diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 33b63ea..6dbde4d 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,4 +13,4 @@ RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev python3-pip RUN conda install -n base h5py -RUN pip3 install h5pyd +RUN pip3 install h5pyd, s3fs diff --git a/hsds/data/hsdstest/db/.gitignore b/hsds/data/hsdstest/db/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/hsds/data/hsdstest/home/vscode/.gitignore b/hsds/data/hsdstest/home/vscode/.gitignore new file mode 100644 index 0000000..e69de29 From 848f5474ac30b082957d8d2432727aec5abdde3b Mon Sep 17 00:00:00 2001 From: jreadey Date: Thu, 4 Apr 2024 09:31:27 -0700 Subject: [PATCH 16/25] updates for first two tutorials --- python/01-Intro.ipynb | 101 ++++++-- python/02-Command Line Tools.ipynb | 392 +++++++++++------------------ 2 files changed, 234 insertions(+), 259 deletions(-) diff --git a/python/01-Intro.ipynb b/python/01-Intro.ipynb index 6550dce..6873666 100644 --- a/python/01-Intro.ipynb +++ b/python/01-Intro.ipynb @@ -20,17 +20,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to False to use HSDS instead\n", "if USE_H5PY:\n", " import h5py\n", - " WORK_DIR=\".\" # this directory\n", + " WORK_DIR = \".\"\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR = \"hdf5://home/vscode/\"\n", "import os.path as op" ] }, @@ -56,14 +56,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "creating HDF5 file here: hdf5://home/vscode/01.h5\n" + ] + } + ], "source": [ "# create a new file\n", "filepath = op.join(WORK_DIR, \"01.h5\")\n", @@ -73,14 +81,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'g-b161d3a2-0eb5b8e5-39e9-5b2a86-3d306d'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# get file id (equivalent to the id of the root group)\n", "f.id.id\n" @@ -88,23 +107,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'/'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "f.name" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# no other objects in the file currently\n", "len(f)" @@ -112,9 +153,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# create a group in the file\n", "f.create_group(\"g1\")\n", @@ -123,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "collapsed": false, "jupyter": { @@ -140,14 +192,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'g-b161d3a2-0eb5b8e5-39e9-5b2a86-3d306d'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "f.id.id" ] @@ -162,7 +225,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -187,7 +250,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/python/02-Command Line Tools.ipynb b/python/02-Command Line Tools.ipynb index b777c65..b939d6d 100644 --- a/python/02-Command Line Tools.ipynb +++ b/python/02-Command Line Tools.ipynb @@ -19,56 +19,46 @@ " " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using HDF5 Library Tools\n", + "------------------------\n", + "\n", + "There are several command line tools that are included with the HDF5 library.\n", + "The most commonly used ones are: \n", + "\n", + "* `h5ls` - list contents of an HDF5 file\n", + "* `h5dump` - dump out the contents of an HDF5 file\n", + "* `h5diff` - compare two HDF5 files\n", + "* `h5stat` - get detailed statistics on an HDF5 file\n", + "\n", + "We'll explore each of these below..." + ] + }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0] Downloading 'https://s3.amazonaws.com/hdfgroup/data/hdf5test/tall.h5' ...\n", - "Saving 'tall.h5'\n", - "HTTP response 200 OK [https://s3.amazonaws.com/hdfgroup/data/hdf5test/tall.h5]\n", - "\u001b[m\u001b[m\u001b[m\u001b[m" - ] - } - ], + "outputs": [], "source": [ - "# get a sample HDF5 file\n", + "# To start with, let's grab an HDF5 file to work with...\n", + "# The exclamation sign indicates to jupyter to execute the following cell in the shell\n", + "# Alternatively you use the codespace terminal tool and run wget there.\n", "! wget https://s3.amazonaws.com/hdfgroup/data/hdf5test/tall.h5" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/ Group\n", - "/g1 Group\n", - "/g1/g1.1 Group\n", - "/g1/g1.1/dset1.1.1 Dataset {10, 10}\n", - "/g1/g1.1/dset1.1.2 Dataset {20}\n", - "/g1/g1.2 Group\n", - "/g1/g1.2/extlink External Link {somefile//somepath}\n", - "/g1/g1.2/g1.2.1 Group\n", - "/g1/g1.2/g1.2.1/slink Soft Link {somevalue}\n", - "/g2 Group\n", - "/g2/dset2.1 Dataset {10}\n", - "/g2/dset2.2 Dataset {3, 5}\n" - ] - } - ], + "outputs": [], "source": [ "# Display objects with a file. Use -r for recursive mode\n", "! h5ls -r tall.h5" @@ -76,111 +66,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HDF5 \"tall.h5\" {\n", - "GROUP \"/\" {\n", - " ATTRIBUTE \"attr1\" {\n", - " DATATYPE H5T_STD_I8BE\n", - " DATASPACE SIMPLE { ( 10 ) / ( 10 ) }\n", - " DATA {\n", - " \"abcdefghi\\000\"\n", - " }\n", - " }\n", - " ATTRIBUTE \"attr2\" {\n", - " DATATYPE H5T_STD_I32BE\n", - " DATASPACE SIMPLE { ( 2, 2 ) / ( 2, 2 ) }\n", - " DATA {\n", - " (0,0): 0, 1,\n", - " (1,0): 2, 3\n", - " }\n", - " }\n", - " GROUP \"g1\" {\n", - " GROUP \"g1.1\" {\n", - " DATASET \"dset1.1.1\" {\n", - " DATATYPE H5T_STD_I32BE\n", - " DATASPACE SIMPLE { ( 10, 10 ) / ( 10, 10 ) }\n", - " DATA {\n", - " (0,0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " (1,0): 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n", - " (2,0): 0, 2, 4, 6, 8, 10, 12, 14, 16, 18,\n", - " (3,0): 0, 3, 6, 9, 12, 15, 18, 21, 24, 27,\n", - " (4,0): 0, 4, 8, 12, 16, 20, 24, 28, 32, 36,\n", - " (5,0): 0, 5, 10, 15, 20, 25, 30, 35, 40, 45,\n", - " (6,0): 0, 6, 12, 18, 24, 30, 36, 42, 48, 54,\n", - " (7,0): 0, 7, 14, 21, 28, 35, 42, 49, 56, 63,\n", - " (8,0): 0, 8, 16, 24, 32, 40, 48, 56, 64, 72,\n", - " (9,0): 0, 9, 18, 27, 36, 45, 54, 63, 72, 81\n", - " }\n", - " ATTRIBUTE \"attr1\" {\n", - " DATATYPE H5T_STD_I8BE\n", - " DATASPACE SIMPLE { ( 27 ) / ( 27 ) }\n", - " DATA {\n", - " \"1st attribute of dset1.1.1\\000\"\n", - " }\n", - " }\n", - " ATTRIBUTE \"attr2\" {\n", - " DATATYPE H5T_STD_I8BE\n", - " DATASPACE SIMPLE { ( 27 ) / ( 27 ) }\n", - " DATA {\n", - " \"2nd attribute of dset1.1.1\\000\"\n", - " }\n", - " }\n", - " }\n", - " DATASET \"dset1.1.2\" {\n", - " DATATYPE H5T_STD_I32BE\n", - " DATASPACE SIMPLE { ( 20 ) / ( 20 ) }\n", - " DATA {\n", - " (0): 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " (17): 17, 18, 19\n", - " }\n", - " }\n", - " }\n", - " GROUP \"g1.2\" {\n", - " EXTERNAL_LINK \"extlink\" {\n", - " TARGETFILE \"somefile\"\n", - " TARGETPATH \"somepath\"\n", - " }\n", - " GROUP \"g1.2.1\" {\n", - " SOFTLINK \"slink\" {\n", - " LINKTARGET \"somevalue\"\n", - " }\n", - " }\n", - " }\n", - " }\n", - " GROUP \"g2\" {\n", - " DATASET \"dset2.1\" {\n", - " DATATYPE H5T_IEEE_F32BE\n", - " DATASPACE SIMPLE { ( 10 ) / ( 10 ) }\n", - " DATA {\n", - " (0): 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9\n", - " }\n", - " }\n", - " DATASET \"dset2.2\" {\n", - " DATATYPE H5T_IEEE_F32BE\n", - " DATASPACE SIMPLE { ( 3, 5 ) / ( 3, 5 ) }\n", - " DATA {\n", - " (0,0): 0, 0.1, 0.2, 0.3, 0.4,\n", - " (1,0): 0, 0.2, 0.4, 0.6, 0.8,\n", - " (2,0): 0, 0.3, 0.6, 0.9, 1.2\n", - " }\n", - " }\n", - " }\n", - "}\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "# h5dump will show the dataset contents by default\n", "! h5dump -r tall.h5" @@ -188,146 +81,165 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# h5dump will display not just the objects in the file, but (by default) print\n", + "# the dataset data as well\n", + "! h5dump -r tall.h5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# h5stat will show many detailed statitistics about the file\n", + "! h5stat tall.h5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using h5pyd Tools\n", + "------------------------\n", + "\n", + "The h5pyd Python package provides a Python interface for accessing HSDS. \n", + "It's based on the h5py API, so most programs should be easily converted from using\n", + "h5py to h5pyd. \n", + "The h5pyd package also include a set of command line tools for working with HSDS content.\n", + "There are analogs to the library tools (`hsls` rather than `h5ls`) plus some additional tools\n", + "that serve as standins for the common Linux command line tools (e.g. `hsrm` rather than `rm`).\n", + "There are also tools for uploading an HDF5 file to an HSDS domain (`hsload`) and \n", + "downloading an HSDS domain to an HDF5 file (hopefully not the worse for wear).\n", + "The tools linclude: hsconfigure, hsload, hsget, hsls, hstouch, hsrm, hsacl, hsdiff,\n", + "and hsstat: \n", + "\n", + "* `hsconfigure` - setup a connection to an HSDS server\n", + "* `hsload` - copy an HDF5 file to an HSDS domain\n", + "* `hsget` - copy an HSDS domain to an HDF5 file\n", + "* `hsls` - list the contents of an HSDS domain (or HSDS folders)\n", + "* `hstouch` - create a new domain or folder\n", + "* `hsrm` - remove a domain or folder\n", + "* `hsacl` - view or edit HSDS folder or domain ACLs (permission settings)\n", + "* `hsdiff` - compare an HDF5 file with an HSDS domain\n", + "* `hsstat` - get detailed statistics on an HSDS domain\n", + "\n", + "Running any of these with `--help` will provide usage info.\n", + "\n", + "In addition we'll try out some of these below..." + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "server name: hsds on devone\n", - "server state: READY\n", - "endpoint: http://hsds.hdf.test:5101\n", - "username: test_user1 \n", - "password: ****\n", - "home: /home/test_user1/\n", - "server version: 0.8.1\n", - "node count: 1\n", - "up: 59 min 48 sec\n", - "h5pyd version: 0.15.1\n" - ] - } - ], + "outputs": [], "source": [ - "# On the HSDS side, hsinfo will show the current server status\n", + "# A dedicated instance of HSDS should be running as part of this \n", + "# codespace.\n", + "# You can verify this by using the hsinfo command. \n", + "# It will show the current server status\n", "! hsinfo" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# you can use hsload to copy a local file to HSDS\n", - "! hsload tall.h5 hdf5://home/test_user1/" + "# When you first create the codespace, there are no domains loaded in HSDS,\n", + "# but you can use hsload any HDF5 to HSDS.\n", + "# Let's try it with the file we downloaded earlier.\n", + "! hsload tall.h5 hdf5://home/vscode/" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/ Group\n", - "/g1 Group\n", - "/g1/g1.1 Group\n", - "/g1/g1.1/dset1.1.1 Dataset {10, 10}\n", - "/g1/g1.1/dset1.1.2 Dataset {20}\n", - "/g1/g1.2 Group\n", - "/g1/g1.2/g1.2.1 Group\n", - "/g1/g1.2/g1.2.1/slink SoftLink {somevalue}\n", - "/g1/g1.2/extlink ExternalLink {somepath//somefile}\n", - "/g2 Group\n", - "/g2/dset2.1 Dataset {10}\n", - "/g2/dset2.2 Dataset {3, 5}\n" - ] - } - ], + "outputs": [], "source": [ "# hsls works like h5ls but with content managed by the server\n", - "! hsls -r hdf5://home/test_user1/tall.h5" + "! hsls -r hdf5://home/vscode/tall.h5" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test_user1 folder 2023-03-10 17:20:29 hdf5://home/test_user1/\n", - "test_user1 domain 2023-08-09 16:59:50 hdf5://home/test_user1/01.h5\n", - "test_user1 domain 2023-08-09 16:39:16 hdf5://home/test_user1/03.h5\n", - "test_user1 domain 2023-08-09 18:39:25 hdf5://home/test_user1/04.h5\n", - "test_user1 domain 2023-08-10 17:21:23 hdf5://home/test_user1/05.h5\n", - "test_user1 domain 2023-08-10 17:37:10 hdf5://home/test_user1/06.h5\n", - "test_user1 domain 2023-04-11 16:47:20 hdf5://home/test_user1/ascii_attr.h5\n", - "test_user1 domain 2023-04-10 12:43:53 hdf5://home/test_user1/d_nullref.h5\n", - "test_user1 domain 2023-03-22 21:37:59 hdf5://home/test_user1/d_objref.h5\n", - "test_user1 folder 2023-03-31 13:05:11 hdf5://home/test_user1/equinor\n", - "test_user1 domain 2023-04-10 14:55:33 hdf5://home/test_user1/fletcher32.h5\n", - "test_user1 domain 2023-04-26 20:21:05 hdf5://home/test_user1/g1.h5\n", - "test_user1 folder 2023-03-20 22:04:21 hdf5://home/test_user1/h5pyd_test\n", - "test_user1 folder 2023-03-11 20:42:12 hdf5://home/test_user1/hsds_test\n", - "test_user1 folder 2023-06-21 12:39:00 hdf5://home/test_user1/icesat2\n", - "test_user1 domain 2023-04-10 14:55:52 hdf5://home/test_user1/scale_offset.h5\n", - "test_user1 domain 2023-03-30 19:57:15 hdf5://home/test_user1/scaleoffset.h5\n", - "test_user1 domain 2023-04-10 15:32:54 hdf5://home/test_user1/shuffle_compress.h5\n", - "test_user1 domain 2023-03-23 17:27:25 hdf5://home/test_user1/simpleattr.h5\n", - "test_user1 domain 2023-04-28 16:33:47 hdf5://home/test_user1/snp500_link.h5\n", - "test_user1 folder 2023-04-05 22:21:07 hdf5://home/test_user1/stream\n", - "test_user1 domain 2023-08-10 17:54:20 hdf5://home/test_user1/tall.h5\n", - "test_user1 domain 2023-03-11 20:39:53 hdf5://home/test_user1/tall2.h5\n", - "test_user1 domain 2023-03-13 13:10:12 hdf5://home/test_user1/tall3.h5\n", - "test_user1 domain 2023-04-10 16:24:25 hdf5://home/test_user1/tall_compress.h5\n", - "test_user1 folder 2023-03-10 17:21:28 hdf5://home/test_user1/test\n", - "test_user1 folder 2023-07-16 22:25:44 hdf5://home/test_user1/tmp\n", - "test_user1 domain 2023-05-09 18:41:51 hdf5://home/test_user1/wordmap.h5\n", - "28 items\n" - ] - } - ], + "outputs": [], "source": [ - "# hsls can also be used to display contents of an HSDS folder\n", + "# hsls can also be used to display contents of an HSDS folder.\n", + "# HSDS folders are similar in concept to directories. They allow you\n", + "# to organize collections of domains and sub-folders\n", "# Note: trailing slash is required\n", - "! hsls hdf5://home/test_user1/" + "\n", + "! hsls hdf5://home/vscode/" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "domain: hdf5://home/test_user1/tall.h5\n", - " owner: test_user1\n", - " id: g-d761f590-972a9c10-5fe9-7d181f-a21498\n", - " last modified: 2023-08-10 17:54:20\n", - " total_size: 0\n", - " allocated_bytes: 0\n", - " num objects: 0\n", - " num chunks: 0\n" - ] - } - ], + "outputs": [], + "source": [ + "# hsstat can be used to see statistics of the domain\n", + "! hsstat hdf5://home/vscode/tall.h5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# and hsget allows you to create an hdf5 file from an HSDS domain\n", + "! hsget hdf5://home/vscode/tall.h5 tall2.h5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# compare this to the original. No output indicates that the two are equivalent\n", + "! h5diff tall.h5 tall2.h5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HDF5 File Linking\n", + "-----------------\n", + "\n", + "If you would like to load a HDF5 file in the cloud (with s3 or azure blob storage), you can *link* to it rather\n", + "than copying all the data into the limited storage include with your codespace.\n", + "Linking we just copy the HDF5 file metadata (typically a small fraction of the over file size) to your\n", + "local HSDS store. The HDF5 \"chunks\" (where dataset data is stored) are accessed on demand from the cloud provider.\n", + "Since your vscode space is also in the cloud, this should be quite fast compared with accessing directly from your \n", + "laptop." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "# hsstat provides information about storage used for HSDS objects similar to how h5stat works\n", - "# Note: it may take a minute before the information displayed by hsstat is current\n", - "! hsstat hdf5://home/test_user1/tall.h5" + "# Use the --link option to link to an existing file.\n", + "! hsload --link s3://hdf5.sample/data/hdf5test/snp500.h5 hdf5://home/vsode/snp500.h5" ] }, { @@ -458,7 +370,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.9.18" } }, "nbformat": 4, From 47cd46306fa5e30903e751340e3a82db198f3756 Mon Sep 17 00:00:00 2001 From: jreadey Date: Thu, 4 Apr 2024 10:10:06 -0700 Subject: [PATCH 17/25] fix dockerfile --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6dbde4d..825aebf 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,4 +13,4 @@ RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev python3-pip RUN conda install -n base h5py -RUN pip3 install h5pyd, s3fs +RUN pip3 install h5pyd s3fs From f4ca38ad54d09bb1ef22b36ff52cc8fdc68b2450 Mon Sep 17 00:00:00 2001 From: jreadey Date: Thu, 4 Apr 2024 10:21:17 -0700 Subject: [PATCH 18/25] updated hsds config --- hsds/config/override.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hsds/config/override.yml b/hsds/config/override.yml index 5f66abc..c43b590 100644 --- a/hsds/config/override.yml +++ b/hsds/config/override.yml @@ -1,8 +1,9 @@ -log_level: DEBUG +log_level: ERROR bucket_name: hsdstest server_name: HSDS for Github codespaces chunk_mem_cache_size: 256m root_dir: /workspace/hsds/data +default_public: True head_sleep_time: 60 # max sleep time between health checks for head node node_sleep_time: 60 # max sleep time between health checks for SN/DN nodes From 6dcbb0d191a6dfdaf9ca20e8da7fa2ae846953be Mon Sep 17 00:00:00 2001 From: jreadey Date: Fri, 5 Apr 2024 06:07:56 -0700 Subject: [PATCH 19/25] add docker-outside-of-docker feature --- .devcontainer/devcontainer.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index eecfcce..57c80df 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,9 @@ "portsAttributes": { "5101": {"label": "HSDS port", "onAutoForward": "silent"} }, - + "features": { + "ghcr.io/devcontainers/features/docker-outside-of-docker": {} + }, "customizations": { "vscode": { "extensions": [ From 2d25895b79964153da782ce33701cd794765237d Mon Sep 17 00:00:00 2001 From: jreadey Date: Fri, 5 Apr 2024 06:43:32 -0700 Subject: [PATCH 20/25] updated config settings --- .devcontainer/docker-compose.yaml | 2 +- hsds/config/override.yml | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 6329dfe..47a258d 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -59,7 +59,7 @@ services: - BUCKET_NAME=hsdstest - HSDS_ENDPOINT=http://localhost:5101 ports: - - 5101-5104:5101 + - 5101:5101 depends_on: - head volumes: diff --git a/hsds/config/override.yml b/hsds/config/override.yml index c43b590..bd5ab55 100644 --- a/hsds/config/override.yml +++ b/hsds/config/override.yml @@ -1,9 +1,13 @@ -log_level: ERROR -bucket_name: hsdstest -server_name: HSDS for Github codespaces -chunk_mem_cache_size: 256m -root_dir: /workspace/hsds/data -default_public: True +log_level: ERROR # or DEBUG or INFO or WARNING +bucket_name: hsdstest # default bucket name +server_name: HSDS for Github codespaces # this is displayed by the hsinfo command +chunk_mem_cache_size: 256m # chunk_cache memory per DN container +root_dir: /workspace/hsds/data # this is where HSDS data files will be stored +default_public: True # New domains will be created as default read +aws_region: us-west-2 # The majority of public data files are in us-west-2 +aws_s3_gateway: http://s3.us-west-2.amazonaws.com/ # s3 gateway for us-west-2 +aws_s3_no_sign_request: True # Set to False if setting aws_access_key_id and aws_secret_access_key here head_sleep_time: 60 # max sleep time between health checks for head node node_sleep_time: 60 # max sleep time between health checks for SN/DN nodes - +aws_region: us-west-2 # (original was us-east-1) +aws_s3_gateway: http://s3.us-west-2.amazonaws.com/ # (original was null) \ No newline at end of file From c07cc8349641b0ae71f2432c15c83a1edff8dd65 Mon Sep 17 00:00:00 2001 From: jreadey Date: Fri, 5 Apr 2024 08:00:03 -0700 Subject: [PATCH 21/25] updates for hsds examples --- .devcontainer/Dockerfile | 2 +- 05-HSDS.ipynb | 56 ++---- python/01-Intro.ipynb | 93 ++------- python/03-Datasets.ipynb | 6 +- python/04-Compression.ipynb | 6 +- python/05-GroupsAndLinks.ipynb | 6 +- python/06-Attributes.ipynb | 6 +- python/07-Types.ipynb | 6 +- python/nrel_wtk_example.ipynb | 334 +++++++++++++++++++++++++++++++++ 9 files changed, 377 insertions(+), 138 deletions(-) create mode 100644 python/nrel_wtk_example.ipynb diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 825aebf..02737aa 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -12,5 +12,5 @@ RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bi RUN apt-get update && apt-get -y install --no-install-recommends \ build-essential cmake libtool-bin git less wget \ libhdf5-dev libhdf5-mpi-dev hdf5-tools libyajl-dev python3-pip -RUN conda install -n base h5py +RUN conda install -n base h5py matplotlib h5netcdf xarray RUN pip3 install h5pyd s3fs diff --git a/05-HSDS.ipynb b/05-HSDS.ipynb index 145fa9c..7f90800 100644 --- a/05-HSDS.ipynb +++ b/05-HSDS.ipynb @@ -10,45 +10,13 @@ "\n", "## Installation\n", "\n", - "For this simplistic setup, the installation is easy:\n", + "This code space is already configured to run HSDS, so no special setup is needed. \n", + "Refer to the files under .vscode in this project to see how it is done.\n", "\n", - "1. Create a directory for the HSDS data files (`~/hsds_data`).\n", - "2. Use the user name `vscode` and password `vscode` to authenticate to HSDS.\n", - "3. Launch the service.\n", - "\n", - "For more sophisticated setups (e.g., Kubernetes), please refer to the [HSDS documentation](https://github.com/HDFGroup/hsds/tree/master/docs)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "export HS_ENDPOINT=http://localhost:5101\n", - "export HS_USERNAME=$USER\n", - "export HS_PASSWORD=$USER\n", - "mkdir ~/hsds_data\n", - "hsds --root_dir ~/hsds_data --hs_username $USER --hs_password $USER >~/hs.log 2>&1 &\n", - "sleep 5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We create a configuration file for HSDS: `~/.hscfg`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "hsconfigure <<< $'http://localhost:5101\\nvscode\\nvscode\\n\\nY\\n'" + "Here HSDS is running in the cloud as part of Github Code Spaces. You can also installed \n", + "on your Mac/Windows/or Linux desktop.\n", + "Please refer to the [HSDS documentation](https://github.com/HDFGroup/hsds/tree/master/docs)\n", + "for detailed instructions." ] }, { @@ -72,7 +40,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create the top-level domain and a user \"directory\" for the user `vscode`:" + "When you first run the codespace, HSDS will have just a hdf5:/home/ and \n", + "hdf5:/home/vscode/ folders." ] }, { @@ -82,8 +51,7 @@ "outputs": [], "source": [ "%%bash\n", - "hstouch /home/ && hstouch /home/$USER/\n", - "hsinfo" + "hsls hdf5://home/" ] }, { @@ -140,7 +108,7 @@ "outputs": [], "source": [ "%%bash\n", - "curl -s -u vscode:vscode http://localhost:5101/?domain=/home/vscode/foo.h5 | jq" + "curl -s http://localhost:5101/?domain=/home/vscode/foo.h5 | jq" ] }, { @@ -150,7 +118,7 @@ "outputs": [], "source": [ "%%bash\n", - "find ~/hsds_data" + "find ~/hsds/data/" ] }, { @@ -407,7 +375,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/01-Intro.ipynb b/python/01-Intro.ipynb index 6873666..b59d0d5 100644 --- a/python/01-Intro.ipynb +++ b/python/01-Intro.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -56,22 +56,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "creating HDF5 file here: hdf5://home/vscode/01.h5\n" - ] - } - ], + "outputs": [], "source": [ "# create a new file\n", "filepath = op.join(WORK_DIR, \"01.h5\")\n", @@ -81,25 +73,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "data": { - "text/plain": [ - "'g-b161d3a2-0eb5b8e5-39e9-5b2a86-3d306d'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# get file id (equivalent to the id of the root group)\n", "f.id.id\n" @@ -107,45 +88,23 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "f.name" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# no other objects in the file currently\n", "len(f)" @@ -153,20 +112,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# create a group in the file\n", "f.create_group(\"g1\")\n", @@ -175,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { @@ -192,25 +140,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, - "outputs": [ - { - "data": { - "text/plain": [ - "'g-b161d3a2-0eb5b8e5-39e9-5b2a86-3d306d'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "f.id.id" ] @@ -225,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/python/03-Datasets.ipynb b/python/03-Datasets.ipynb index b88bfd1..9bab2c1 100644 --- a/python/03-Datasets.ipynb +++ b/python/03-Datasets.ipynb @@ -27,13 +27,13 @@ "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to True to use h5py/hdf5lib instead\n", "if USE_H5PY:\n", " import h5py\n", " WORK_DIR=\".\" # this directory\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR=\"hdf5://home/vscode/\"\n", "import os.path as op" ] }, @@ -271,7 +271,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/04-Compression.ipynb b/python/04-Compression.ipynb index ce01e39..e3a9c1b 100644 --- a/python/04-Compression.ipynb +++ b/python/04-Compression.ipynb @@ -25,13 +25,13 @@ "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to True to use h5py/hdf5lib instead\n", "if USE_H5PY:\n", " import h5py\n", " WORK_DIR=\".\" # this directory\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR=\"hdf5://home/vscode/\"\n", "import os.path as op\n", "import random" ] @@ -252,7 +252,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/05-GroupsAndLinks.ipynb b/python/05-GroupsAndLinks.ipynb index 18a8a76..0da8f22 100644 --- a/python/05-GroupsAndLinks.ipynb +++ b/python/05-GroupsAndLinks.ipynb @@ -25,13 +25,13 @@ "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to False to use h5py/hdf5lib instead\n", "if USE_H5PY:\n", " import h5py\n", " WORK_DIR=\".\" # this directory\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR=\"hdf5://home/vscode/\"\n", "import os.path as op" ] }, @@ -375,7 +375,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/06-Attributes.ipynb b/python/06-Attributes.ipynb index 1e4309e..88d6d4e 100644 --- a/python/06-Attributes.ipynb +++ b/python/06-Attributes.ipynb @@ -25,13 +25,13 @@ "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to True to use h5py/hdf5lib instead\n", "if USE_H5PY:\n", " import h5py\n", " WORK_DIR=\".\" # this directory\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR=\"hdf5://home/vscode/\"\n", "import os.path as op" ] }, @@ -174,7 +174,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/07-Types.ipynb b/python/07-Types.ipynb index d420f32..923feb9 100644 --- a/python/07-Types.ipynb +++ b/python/07-Types.ipynb @@ -25,13 +25,13 @@ "metadata": {}, "outputs": [], "source": [ - "USE_H5PY = True # set to False to use HSDS instead\n", + "USE_H5PY = False # set to True to use h5py/hdf5lib instead\n", "if USE_H5PY:\n", " import h5py\n", " WORK_DIR=\".\" # this directory\n", "else:\n", " import h5pyd as h5py\n", - " WORK_DIR=\"hdf5://home/test_user1/\"\n", + " WORK_DIR=\"hdf5://home/vscode/\"\n", "import os.path as op\n", "import numpy as np" ] @@ -184,7 +184,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/python/nrel_wtk_example.ipynb b/python/nrel_wtk_example.ipynb new file mode 100644 index 0000000..3c5bef1 --- /dev/null +++ b/python/nrel_wtk_example.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## NREL WIND Toolkit Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook demonstrates basic usage of the National Renewable Energy Laboratory (NREL) Wind Integration National Dataset (WIND) Toolkit data. More complete examples can be found here: https://github.com/NREL/hsds-examples. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import h5pyd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.image as mpimg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "h5pyd.version.version # should be >= 0.4.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! hsinfo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# In the shell, use the --bucket option to list files from NREL's S3 bucket \n", + "! hsls --bucket s3://nrel-pds-hsds /nrel/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# Open the wind data \"file\". Use the bucket param to get data from NREL's S3 bucket\n", + "%time f = h5pyd.File(\"/nrel/wtk-us.h5\", 'r', bucket=\"s3://nrel-pds-hsds\") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "%time f.attrs['history'] # attributes can be used to provide desriptions of the content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "list(f) # list the datasets in the file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the windspeed at 80 meters\n", + "dset = f['windspeed_80m']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "dset.id.id # if this is an int, then you are using h5py!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "dset.shape # shape is three-dimensional time x lat x lon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "dset.dtype # type is four byte floats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "dset.chunks # chunks describe how the dataset data is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "dset.shape[0] * dset.shape[1] * dset.shape[2] * 4 # ~1 TB per dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# read one slice of the data\n", + "%time data = dset[522,::,::]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "plt.imshow(data, origin=\"lower\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# Get all the values for a given geographic point\n", + "# this may take up to a minute\n", + "%time tseries = dset[:, 290, 201]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "len(tseries) # 7 years * 365 days * 24 hours" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tseries.min(), tseries.max(), tseries.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "x = range(len(tseries))\n", + "plt.plot(x, tseries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# get just one month of values\n", + "start = 25000 # anything between 0 and 61367 will work\n", + "end = start + 30*24\n", + "%time tseries = dset[start:end, 1292, 601]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tseries.min(), tseries.max(), tseries.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = range(len(tseries))\n", + "plt.plot(x, tseries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 817b66f9c330a99c96032bc94d0fd9a2f81379b4 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 8 Apr 2024 04:43:35 -0700 Subject: [PATCH 22/25] run 4 DN nodes --- .devcontainer/docker-compose.yaml | 53 ++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 47a258d..9db219c 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -31,7 +31,7 @@ services: volumes: - ../hsds/data:/data - ../hsds/config/:/config/ - dn: + dn1: image: hdfgroup/hsds:sha-ed38987 restart: on-failure mem_limit: 1g @@ -48,6 +48,57 @@ services: - ../hsds/config/:/config/ links: - head + dn2: + image: hdfgroup/hsds:sha-ed38987 + restart: on-failure + mem_limit: 1g + environment: + - NODE_TYPE=dn + - ROOT_DIR=/data + - BUCKET_NAME=hsdstest + ports: + - 6102 + depends_on: + - head + volumes: + - ../hsds/data:/data + - ../hsds/config/:/config/ + links: + - head + dn3: + image: hdfgroup/hsds:sha-ed38987 + restart: on-failure + mem_limit: 1g + environment: + - NODE_TYPE=dn + - ROOT_DIR=/data + - BUCKET_NAME=hsdstest + ports: + - 6103 + depends_on: + - head + volumes: + - ../hsds/data:/data + - ../hsds/config/:/config/ + links: + - head + dn4: + image: hdfgroup/hsds:sha-ed38987 + restart: on-failure + mem_limit: 1g + environment: + - NODE_TYPE=dn + - ROOT_DIR=/data + - BUCKET_NAME=hsdstest + ports: + - 6104 + depends_on: + - head + volumes: + - ../hsds/data:/data + - ../hsds/config/:/config/ + links: + - head sn: image: hdfgroup/hsds:sha-ed38987 restart: on-failure From 0c86b2544cdae506fe972a5b6e0796df510de75f Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 8 Apr 2024 06:40:09 -0700 Subject: [PATCH 23/25] added nrel nsrdb example --- python/nrel_nsrdb_example.ipynb | 192 ++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 python/nrel_nsrdb_example.ipynb diff --git a/python/nrel_nsrdb_example.ipynb b/python/nrel_nsrdb_example.ipynb new file mode 100644 index 0000000..bc29bfd --- /dev/null +++ b/python/nrel_nsrdb_example.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## NREL NSRDB Example\n", + "\n", + "This notebook illustrates accessing the NREL NSRDB (National Solar Radiation Database) using both h5pyd with HSDS and h5py with the HDF5 library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "USE_H5PY = False # set to True to use h5py/hdf5lib instead\n", + "if USE_H5PY:\n", + " import h5py\n", + " import s3fs\n", + "else:\n", + " import h5pyd as h5py\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# In the shell, use the --bucket option to list files from NREL's S3 bucket \n", + "# run with \"-r\" option to see all domains\n", + "! hsls --bucket s3://nrel-pds-hsds /nrel/nsrdb/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# Open the nsrdb file. Use the bucket param to get the data from NREL's S3 bucket\n", + "if USE_H5PY:\n", + " s3 = s3fs.S3FileSystem()\n", + " f = h5py.File(s3.open(\"s3://nrel-pds-nsrdb/conus/nsrdb_conus_pv_2022.h5\", \"rb\"), \"r\")\n", + "else:\n", + " f = h5py.File(\"/nrel/nsrdb/conus/nsrdb_conus_2022.h5\", bucket=\"s3://nrel-pds-hsds\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# attributes can be used to provide desriptions of the content\n", + "%time f.attrs['version'] " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset.id.id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset.shape # two-dimensional time x station_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset.chunks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# read one year of measurments for a given station_index\n", + "%time tseries = dset[::,1234567]\n", + "tseries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get min, max, and mean values\n", + "tseries.min(), tseries.max(), tseries.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot the data\n", + "x = range(len(tseries))\n", + "plt.plot(x, tseries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This dataset is actually linked from an HDF5 file in a different bucket\n", + "if USE_H5PY:\n", + " # this property doesn't exist for h5py\n", + " layout = None\n", + "else:\n", + " layout = dset.id.layout\n", + "layout" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The HSDS domain actually maps to several different HDF5 files\n", + "# compile a list of all the files\n", + "hdf5_files = set()\n", + "if not USE_H5PY:\n", + " for k in f:\n", + " dset = f[k]\n", + " layout = dset.id.layout\n", + " if \"file_uri\" in layout:\n", + " hdf5_files.add(layout[\"file_uri\"])\n", + "hdf5_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py39", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 0dbb604c18d6821094ab2dcdb71c50ac73a60fb5 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 8 Apr 2024 10:03:55 -0700 Subject: [PATCH 24/25] updated image for hsds --- .devcontainer/docker-compose.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 9db219c..7646845 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -17,7 +17,7 @@ services: # Runs app on the same network as the SN container, allows "forwardPorts" in devcontainer.json function. network_mode: service:sn head: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 512m environment: @@ -32,7 +32,7 @@ services: - ../hsds/data:/data - ../hsds/config/:/config/ dn1: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: @@ -49,7 +49,7 @@ services: links: - head dn2: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: @@ -66,7 +66,7 @@ services: links: - head dn3: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: @@ -83,7 +83,7 @@ services: links: - head dn4: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: @@ -100,7 +100,7 @@ services: links: - head sn: - image: hdfgroup/hsds:sha-ed38987 + image: hdfgroup/hsds:master restart: on-failure mem_limit: 1g environment: From 8192050ab798a8ae51a1b92a01424e7c480fcc02 Mon Sep 17 00:00:00 2001 From: jreadey Date: Tue, 9 Apr 2024 09:12:59 -0700 Subject: [PATCH 25/25] set postCreateCommand --- .devcontainer/devcontainer.json | 1 + python/nrel_nsrdb_example.ipynb | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 57c80df..28283ff 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,7 @@ { "name": "HDF5 Tutorial", "dockerComposeFile": "docker-compose.yaml", + "postCreateCommand": "conda init", "service": "app", "workspaceFolder": "/workspace", "forwardPorts": [5101], diff --git a/python/nrel_nsrdb_example.ipynb b/python/nrel_nsrdb_example.ipynb index bc29bfd..9fa75ed 100644 --- a/python/nrel_nsrdb_example.ipynb +++ b/python/nrel_nsrdb_example.ipynb @@ -62,6 +62,25 @@ "%time f.attrs['version'] " ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(f) # datasets under root group" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset = f[\"air_temperature\"]\n", + "dset" + ] + }, { "cell_type": "code", "execution_count": null, @@ -89,6 +108,24 @@ "dset.chunks" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.prod(dset.chunks) * dset.dtype.itemsize # number of bytes per chunk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(dset.shape[0] // dset.chunks[0]) * (dset.shape[1] // dset.chunks[0]) # number of chunks in the dataset" + ] + }, { "cell_type": "code", "execution_count": null, @@ -183,7 +220,15 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", "version": "3.9.15" } },