diff --git a/README.md b/README.md index 2034d85..71fe405 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ pip install git+https://github.com/sign-language-processing/pose-evaluation.git ``` Create a metric: + ```python from pose_evaluation.metrics.distance_metric import DistanceMetric from pose_evaluation.metrics.dtw_metric import DTWDTAIImplementationDistanceMeasure @@ -38,18 +39,32 @@ DTWp = DistanceMetric( ``` Evaluate two pose sequences: + ```python from pose_format import Pose with open("hypothesis.pose", "rb") as f: hypothesis = Pose.read(f) - + with open("reference.pose", "rb") as f: reference = Pose.read(f) DTWp.score(hypothesis, reference) ``` +### Demo Notebook + +For a demonstration of how to use the package, see the [demo notebook](examples\Pose_Evaluation_Example.ipynb) + +Open In Colab + + +Demonstrates: + +- How to reconstruct the metrics from our paper. +- How to use them to score poses, with signatures. +- How to score poses with different lengths, missing/undetected keypoints, or different keypoint formats. + ### Contributing Please make sure to run `make format` before submitting a pull request. diff --git a/examples/Pose_Evaluation_Example.ipynb b/examples/Pose_Evaluation_Example.ipynb new file mode 100644 index 0000000..447b16a --- /dev/null +++ b/examples/Pose_Evaluation_Example.ipynb @@ -0,0 +1,1458 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Demonstrating the Pose Evaluation Repo\n", + "\n", + "Demonstrating how to use the _pose evaluation_ toolkit (https://github.com/sign-language-processing/pose-evaluation).\n", + "\n", + "Demonstrates:\n", + "* How to reconstruct the metrics from our paper.\n", + "* How to use them to score poses, with signatures.\n", + "* How to score poses with different lengths, missing/undetected keypoints, or different keypoint formats.\n", + "\n", + "\n", + "```\n", + "@misc{pose-evaluation2025,\n", + " title={Meaningful Pose-Based Sign Language Evaluation},\n", + " author={Zifan Jiang, Colin Leong, Amit Moryossef, Anne Göhring, Annette Rios, Oliver Cory, Maksym Ivashechkin, Neha Tarigopula, Biao Zhang, Rico Sennrich, Sarah Ebling},\n", + " howpublished={\\url{https://github.com/sign-language-processing/pose-evaluation}},\n", + " year={2025}\n", + "}\n", + "```" + ], + "metadata": { + "id": "oPvpbVsiPESx" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install from source\n", + "\n", + "It will likely ask you to restart the kernel. Do so, then skip to the imports." + ], + "metadata": { + "id": "zswMbGypT4ib" + } + }, + { + "cell_type": "code", + "source": [ + "!git clone https://github.com/sign-language-processing/pose-evaluation.git" + ], + "metadata": { + "id": "S6WMiN8yoBKd", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b1620e36-8e88-4de9-9180-467d2bcff848" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "fatal: destination path 'pose-evaluation' already exists and is not an empty directory.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%cd pose-evaluation" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ch7Ew5gOn2pG", + "outputId": "31799cd5-1a2c-4176-848b-499b8d3f76ab" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/pose-evaluation\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -e ." + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "RDGj8Zcwn48l", + "outputId": "cb9a96dc-3894-4543-a96c-f213753b7196" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Obtaining file:///content/pose-evaluation\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n", + " Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting sign_language_segmentation@ git+https://github.com/sign-language-processing/segmentation (from pose-evaluation==0.0.1)\n", + " Cloning https://github.com/sign-language-processing/segmentation to /tmp/pip-install-1f5eu698/sign-language-segmentation_f9733d4a7bc448bdb9a89568f43eec91\n", + " Running command git clone --filter=blob:none --quiet https://github.com/sign-language-processing/segmentation /tmp/pip-install-1f5eu698/sign-language-segmentation_f9733d4a7bc448bdb9a89568f43eec91\n", + " Resolved https://github.com/sign-language-processing/segmentation to commit 4ac7b10b9878b6c60bbc14ba8ebe09af386f0cfe\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting spoken-to-signed@ git+https://github.com/ZurichNLP/spoken-to-signed-translation.git (from pose-evaluation==0.0.1)\n", + " Cloning https://github.com/ZurichNLP/spoken-to-signed-translation.git to /tmp/pip-install-1f5eu698/spoken-to-signed_16091e7bff3841779069ace04a7df372\n", + " Running command git clone --filter=blob:none --quiet https://github.com/ZurichNLP/spoken-to-signed-translation.git /tmp/pip-install-1f5eu698/spoken-to-signed_16091e7bff3841779069ace04a7df372\n", + " Resolved https://github.com/ZurichNLP/spoken-to-signed-translation.git to commit 21a34fbb7ae6439eb8ed54b0c4a2a5c4538a7977\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: pose-format in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (0.10.5)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (1.16.2)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (2.8.0+cu126)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (1.26.4)\n", + "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (5.1.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (2.2.2)\n", + "Requirement already satisfied: fastdtw in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (0.3.4)\n", + "Requirement already satisfied: dtaidistance in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (2.3.13)\n", + "Requirement already satisfied: typer in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (0.19.2)\n", + "Requirement already satisfied: torchmetrics in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (1.8.2)\n", + "Requirement already satisfied: kaleido in /usr/local/lib/python3.12/dist-packages (from pose-evaluation==0.0.1) (1.1.0)\n", + "Requirement already satisfied: choreographer>=1.0.10 in /usr/local/lib/python3.12/dist-packages (from kaleido->pose-evaluation==0.0.1) (1.1.2)\n", + "Requirement already satisfied: logistro>=1.0.8 in /usr/local/lib/python3.12/dist-packages (from kaleido->pose-evaluation==0.0.1) (1.1.0)\n", + "Requirement already satisfied: orjson>=3.10.15 in /usr/local/lib/python3.12/dist-packages (from kaleido->pose-evaluation==0.0.1) (3.11.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from kaleido->pose-evaluation==0.0.1) (25.0)\n", + "Requirement already satisfied: pytest-timeout>=2.4.0 in /usr/local/lib/python3.12/dist-packages (from kaleido->pose-evaluation==0.0.1) (2.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->pose-evaluation==0.0.1) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->pose-evaluation==0.0.1) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->pose-evaluation==0.0.1) (2025.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from pose-format->pose-evaluation==0.0.1) (4.67.1)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /usr/local/lib/python3.12/dist-packages (from sentence-transformers->pose-evaluation==0.0.1) (4.56.2)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (from sentence-transformers->pose-evaluation==0.0.1) (1.6.1)\n", + "Requirement already satisfied: huggingface-hub>=0.20.0 in /usr/local/lib/python3.12/dist-packages (from sentence-transformers->pose-evaluation==0.0.1) (0.35.3)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.12/dist-packages (from sentence-transformers->pose-evaluation==0.0.1) (11.3.0)\n", + "Requirement already satisfied: typing_extensions>=4.5.0 in /usr/local/lib/python3.12/dist-packages (from sentence-transformers->pose-evaluation==0.0.1) (4.15.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (3.19.1)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (75.2.0)\n", + "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (1.13.3)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (3.5)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (2025.3.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.77)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.77)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.80)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (9.10.2.21)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.4.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (11.3.0.4)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (10.3.7.77)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (11.7.1.2)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.5.4.2)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (0.7.1)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (2.27.3)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.77)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (12.6.85)\n", + "Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (1.11.1.6)\n", + "Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch->pose-evaluation==0.0.1) (3.4.0)\n", + "Requirement already satisfied: pympi-ling in /usr/local/lib/python3.12/dist-packages (from sign_language_segmentation@ git+https://github.com/sign-language-processing/segmentation->pose-evaluation==0.0.1) (1.71)\n", + "Requirement already satisfied: simplemma>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from spoken-to-signed@ git+https://github.com/ZurichNLP/spoken-to-signed-translation.git->pose-evaluation==0.0.1) (1.1.2)\n", + "Requirement already satisfied: lightning-utilities>=0.8.0 in /usr/local/lib/python3.12/dist-packages (from torchmetrics->pose-evaluation==0.0.1) (0.15.2)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.12/dist-packages (from typer->pose-evaluation==0.0.1) (8.3.0)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.12/dist-packages (from typer->pose-evaluation==0.0.1) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.12/dist-packages (from typer->pose-evaluation==0.0.1) (13.9.4)\n", + "Requirement already satisfied: simplejson>=3.19.3 in /usr/local/lib/python3.12/dist-packages (from choreographer>=1.0.10->kaleido->pose-evaluation==0.0.1) (3.20.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (6.0.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (2.32.4)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (1.1.10)\n", + "Requirement already satisfied: pytest>=7.0.0 in /usr/local/lib/python3.12/dist-packages (from pytest-timeout>=2.4.0->kaleido->pose-evaluation==0.0.1) (8.4.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->pose-evaluation==0.0.1) (1.17.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich>=10.11.0->typer->pose-evaluation==0.0.1) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich>=10.11.0->typer->pose-evaluation==0.0.1) (2.19.2)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch->pose-evaluation==0.0.1) (1.3.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers->pose-evaluation==0.0.1) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers->pose-evaluation==0.0.1) (0.22.1)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers->pose-evaluation==0.0.1) (0.6.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch->pose-evaluation==0.0.1) (3.0.3)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers->pose-evaluation==0.0.1) (1.5.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers->pose-evaluation==0.0.1) (3.6.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer->pose-evaluation==0.0.1) (0.1.2)\n", + "Requirement already satisfied: iniconfig>=1 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido->pose-evaluation==0.0.1) (2.1.0)\n", + "Requirement already satisfied: pluggy<2,>=1.5 in /usr/local/lib/python3.12/dist-packages (from pytest>=7.0.0->pytest-timeout>=2.4.0->kaleido->pose-evaluation==0.0.1) (1.6.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers->pose-evaluation==0.0.1) (2025.8.3)\n", + "Building wheels for collected packages: pose-evaluation\n", + " Building editable for pose-evaluation (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pose-evaluation: filename=pose_evaluation-0.0.1-0.editable-py3-none-any.whl size=5503 sha256=9d1eb17dccd1ea8a211faf621b212515a83ec719221b31b12cba62cead6e0f61\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-71_q7ye0/wheels/9b/48/5d/a2efbbc8ca2c54697769a785a456d4df3c6e976041602c68eb\n", + "Successfully built pose-evaluation\n", + "Installing collected packages: pose-evaluation\n", + " Attempting uninstall: pose-evaluation\n", + " Found existing installation: pose-evaluation 0.0.1\n", + " Uninstalling pose-evaluation-0.0.1:\n", + " Successfully uninstalled pose-evaluation-0.0.1\n", + "Successfully installed pose-evaluation-0.0.1\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "pose_evaluation" + ] + }, + "id": "31413e565e3c4390800e6d0be5d0c88b" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Imports" + ], + "metadata": { + "id": "UfcEqyyhUDDJ" + } + }, + { + "cell_type": "code", + "source": [ + "from pathlib import Path\n", + "\n", + "from pose_format import Pose\n", + "\n", + "from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance\n", + "from pose_evaluation.metrics.distance_metric import DistanceMetric\n", + "from pose_evaluation.metrics.dtw_metric import DTWDTAIImplementationDistanceMeasure\n", + "from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric\n", + "from pose_evaluation.metrics.pose_processors import (\n", + " HideLegsPosesProcessor,\n", + " NormalizePosesProcessor,\n", + " ReduceHolisticPoseProcessor,\n", + " ZeroPadShorterPosesProcessor,\n", + " get_standard_pose_processors,\n", + ")\n", + "from pose_evaluation.evaluation.create_metrics import construct_metric" + ], + "metadata": { + "id": "KYyuniWfnQ-F" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# DTW$p$\n", + "DTW$p$ is one of two top pose-similarity metrics from the paper.\n", + "\n", + "DTWp=DTW+Trim+Default0.0+Hands-Only\n", + "\n", + "\n", + "(Masked Fill is 10.0 by default, and thus not mentioned)" + ], + "metadata": { + "id": "BX0yW8-rO2lq" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Construct metric via convenience function" + ], + "metadata": { + "id": "RvgYydpGr_kq" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "\n", + "###############################################\n", + "# Construct DTWp via convenience function, aka\n", + "# startendtrimmed_unnormalized_hands_defaultdist0.0_nointerp_dtw_fillmasked10.0_dtaiDTWAggregatedDistanceMetricFast\n", + "DTWp = construct_metric(\n", + " distance_measure=DTWDTAIImplementationDistanceMeasure(name=\"dtaiDTWAggregatedDistanceMeasureFast\", use_fast=True),\n", + " default_distance=0.0,\n", + " trim_meaningless_frames=True,\n", + " normalize=False,\n", + " sequence_alignment=\"dtw\",\n", + " keypoint_selection=\"hands\", # keep only hand keypoints for all poses\n", + " masked_fill_value=10.0, # fill masked values with 10.0\n", + " fps=None, # don't interpolate fps\n", + " name = None, # autogenerate name\n", + " )\n", + "DTWp.name\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "5633jCdSnheV", + "outputId": "a399aeba-f6bf-4a97-b0a3-2884a2d1d84b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'startendtrimmed_unnormalized_hands_defaultdist0.0_nointerp_dtw_fillmasked10.0_dtaiDTWAggregatedDistanceMetricFast'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 2 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Print metric signature" + ], + "metadata": { + "id": "uCLs8KkDr9mC" + } + }, + { + "cell_type": "code", + "source": [ + "DTWp.get_signature()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vGujN1OGr1M5", + "outputId": "e41c152d-091c-4fd4-9102-f8fdb6f83a88" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "startendtrimmed_unnormalized_hands_defaultdist0.0_nointerp_dtw_fillmasked10.0_dtaiDTWAggregatedDistanceMetricFast|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Construct the same metric from scratch" + ], + "metadata": { + "id": "nR1z8idKry9z" + } + }, + { + "cell_type": "code", + "source": [ + "from pose_evaluation.metrics.pose_processors import (\n", + " AddTOffsetsToZPoseProcessor,\n", + " FillMaskedOrInvalidValuesPoseProcessor,\n", + " FirstFramePadShorterPosesProcessor,\n", + " GetHandsOnlyHolisticPoseProcessor,\n", + " GetYoutubeASLKeypointsPoseProcessor,\n", + " HideLegsPosesProcessor,\n", + " InterpolateAllToSetFPSPoseProcessor,\n", + " MaskInvalidValuesPoseProcessor,\n", + " NormalizePosesProcessor,\n", + " ReduceHolisticPoseProcessor,\n", + " ReducePosesToCommonComponentsProcessor,\n", + " RemoveWorldLandmarksProcessor,\n", + " TrimMeaninglessFramesPoseProcessor,\n", + " ZeroPadShorterPosesProcessor,\n", + ")\n", + "\n", + "# select distance measure with default distance\n", + "measure = DTWDTAIImplementationDistanceMeasure(name=\"dtaiDTWAggregatedDistanceMeasureFast\", use_fast=True, default_distance=0.0)\n", + "\n", + "# create pose preprocessing pipeline\n", + "pose_preprocessors =[]\n", + "pose_preprocessors.append(TrimMeaninglessFramesPoseProcessor())\n", + "# pose_preprocessors.append(NormalizePosesProcessor()) # this metric doesn't do normalization\n", + "pose_preprocessors.append(GetHandsOnlyHolisticPoseProcessor()) # select only the hands\n", + "pose_preprocessors.append(FillMaskedOrInvalidValuesPoseProcessor(masked_fill_value=10.0)) # fill masked values with 10.0\n", + "# pose_preprocessors.append(InterpolateAllToSetFPSPoseProcessor(fps=None)) # don't interpolate\n", + "\n", + "# reduce pairs of poses to common components\n", + "pose_preprocessors.append(ReducePosesToCommonComponentsProcessor())\n", + "\n", + "\n", + "DTWp_from_scratch = DistanceMetric(\n", + " distance_measure=measure,\n", + " name=\"DTWp\",\n", + " pose_preprocessors=pose_preprocessors,\n", + " )\n", + "DTWp_from_scratch.get_signature()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rw4tAH9_rtmn", + "outputId": "592ae26f-e8e8-4c3c-e616-d2a4c205fdd2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DTWp|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Compare signatures\n", + "Other than the _name_ the two signatures are identical." + ], + "metadata": { + "id": "wlqJ6Xc4uez_" + } + }, + { + "cell_type": "code", + "source": [ + "print(DTWp.get_signature().format())\n", + "print(\"\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \"+DTWp_from_scratch.get_signature().format())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ABk7Vntssu5E", + "outputId": "576166b7-5825-43e4-bc26-6afb4bce7de7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "startendtrimmed_unnormalized_hands_defaultdist0.0_nointerp_dtw_fillmasked10.0_dtaiDTWAggregatedDistanceMetricFast|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}\n", + "\t\t\t\t\t\t\t\t\t\t\t\t\t DTWp|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "DTWp_sig_without_name = DTWp.get_signature().format().replace(DTWp.name,\"\")\n", + "DTWp_from_scratch_without_name = DTWp_from_scratch.get_signature().format().replace(DTWp_from_scratch.name, \"\")\n", + "print(DTWp_sig_without_name)\n", + "print(DTWp_from_scratch_without_name)\n", + "DTWp_sig_without_name == DTWp_from_scratch_without_name" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xcOKWVqMuOKO", + "outputId": "34e7c7b6-0364-4126-a8ff-7dcaf8fcab16" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}\n", + "|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes}\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# nDTWp\n", + "\n", + "aka\n", + "\n", + "DTW+Default1.0+MaskFill1.0+Norm.+Hands-Only" + ], + "metadata": { + "id": "i99FTst8P2vt" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Convenience Function" + ], + "metadata": { + "id": "WAwiDY9lSvrT" + } + }, + { + "cell_type": "code", + "source": [ + "###############################################\n", + "# Construct nDTWp via convenience function, aka\n", + "# DTW +Default1.0 +MaskFill1.0 +Norm. +Hands-Only\n", + "# untrimmed_normalizedbyshoulders_hands_defaultdist1.0_nointerp_dtw_fillmasked1.0_dtaiDTWAggregatedDistanceMetricFast\n", + "\n", + "nDTWp = construct_metric(\n", + " distance_measure=DTWDTAIImplementationDistanceMeasure(name=\"dtaiDTWAggregatedDistanceMeasureFast\", use_fast=True),\n", + " default_distance=1.0,\n", + " trim_meaningless_frames=False,\n", + " normalize=True,\n", + " sequence_alignment=\"dtw\",\n", + " keypoint_selection=\"hands\", # keep only hand keypoints for all poses\n", + " masked_fill_value=1.0, # fill masked values with 10.0\n", + " fps=None, # don't interpolate fps\n", + " name = None, # autogenerate name\n", + " )\n", + "nDTWp.name" + ], + "metadata": { + "id": "Ns9FhhiRveWK", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "outputId": "57598e90-56ed-43b9-9e02-1fc9ea4685fa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'untrimmed_normalizedbyshoulders_hands_defaultdist1.0_nointerp_dtw_fillmasked1.0_dtaiDTWAggregatedDistanceMetricFast'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## From Scratch" + ], + "metadata": { + "id": "kcKhnfVkTJhL" + } + }, + { + "cell_type": "code", + "source": [ + "from pose_evaluation.metrics.pose_processors import (\n", + " AddTOffsetsToZPoseProcessor,\n", + " FillMaskedOrInvalidValuesPoseProcessor,\n", + " FirstFramePadShorterPosesProcessor,\n", + " GetHandsOnlyHolisticPoseProcessor,\n", + " GetYoutubeASLKeypointsPoseProcessor,\n", + " HideLegsPosesProcessor,\n", + " InterpolateAllToSetFPSPoseProcessor,\n", + " MaskInvalidValuesPoseProcessor,\n", + " NormalizePosesProcessor,\n", + " ReduceHolisticPoseProcessor,\n", + " ReducePosesToCommonComponentsProcessor,\n", + " RemoveWorldLandmarksProcessor,\n", + " TrimMeaninglessFramesPoseProcessor,\n", + " ZeroPadShorterPosesProcessor,\n", + ")\n", + "\n", + "# select distance measure with default distance\n", + "measure = DTWDTAIImplementationDistanceMeasure(name=\"dtaiDTWAggregatedDistanceMeasureFast\", use_fast=True, default_distance=1.0)\n", + "\n", + "# create pose preprocessing pipeline\n", + "pose_preprocessors =[]\n", + "# pose_preprocessors.append(TrimMeaninglessFramesPoseProcessor()) # don't trim\n", + "pose_preprocessors.append(NormalizePosesProcessor()) # this metric DOES do normalization\n", + "pose_preprocessors.append(GetHandsOnlyHolisticPoseProcessor()) # select only the hands\n", + "pose_preprocessors.append(FillMaskedOrInvalidValuesPoseProcessor(masked_fill_value=1.0)) # fill masked values with 1.0\n", + "# pose_preprocessors.append(InterpolateAllToSetFPSPoseProcessor(fps=None)) # don't interpolate\n", + "\n", + "# reduce pairs of poses to common components\n", + "pose_preprocessors.append(ReducePosesToCommonComponentsProcessor())\n", + "\n", + "\n", + "nDTWp_from_scratch = DistanceMetric(\n", + " distance_measure=measure,\n", + " name=\"nDTWp\",\n", + " pose_preprocessors=pose_preprocessors,\n", + " )\n", + "nDTWp_from_scratch.get_signature()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0gbA2d6zSx-j", + "outputId": "78369e72-6a59-4f12-e4f4-7b9fd5153534" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "nDTWp|higher_is_better:no|pose_preprocessors:[normalize_poses|scale_factor:1,get_hands_only,fill_masked_or_invalid|fill_val:1.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:1.0|aggregation_strategy:mean|use_fast:yes}" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Compare signatures\n", + "Other than the _name_ the two signatures are identical." + ], + "metadata": { + "id": "A9Zey4BkTMBU" + } + }, + { + "cell_type": "code", + "source": [ + "nDTWp_sig_without_name = nDTWp.get_signature().format().replace(nDTWp.name,\"\")\n", + "nDTWp_from_scratch_without_name = nDTWp_from_scratch.get_signature().format().replace(nDTWp_from_scratch.name, \"\")\n", + "print(nDTWp_sig_without_name)\n", + "print(nDTWp_from_scratch_without_name)\n", + "nDTWp_sig_without_name == nDTWp_from_scratch_without_name" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hGTrYVwkTBvr", + "outputId": "00b9336a-4147-49cc-e7c1-cf1304163a96" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "|higher_is_better:no|pose_preprocessors:[normalize_poses|scale_factor:1,get_hands_only,fill_masked_or_invalid|fill_val:1.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:1.0|aggregation_strategy:mean|use_fast:yes}\n", + "|higher_is_better:no|pose_preprocessors:[normalize_poses|scale_factor:1,get_hands_only,fill_masked_or_invalid|fill_val:1.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:1.0|aggregation_strategy:mean|use_fast:yes}\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Demonstration\n", + "\n", + "Let us load in some poses and demonstrate!\n", + "\n", + "We use the _pose-format_ library (https://github.com/sign-language-processing/pose)" + ], + "metadata": { + "id": "3Tqw_fp-UJJq" + } + }, + { + "cell_type": "code", + "source": [ + "house_1_path = Path(\"/content/pose-evaluation/pose_evaluation/utils/test/test_data/mediapipe/standard_landmarks/colin-1-HOUSE.pose\")" + ], + "metadata": { + "id": "aBkX7TZYUVrK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Pose with Mediapipe Landmarks\n", + "Here we load in a file with Mediapipe format landmarks. There are 576 keypoints." + ], + "metadata": { + "id": "Mr7jkdTuUwMZ" + } + }, + { + "cell_type": "code", + "source": [ + "house1_pose = Pose.read(house_1_path.read_bytes())\n", + "print(house1_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Eaogn1ifUKib", + "outputId": "c9470925-2206-4b30-cef5-25b07a759985" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Pose\n", + "PoseHeader\n", + "Version: 0.20000000298023224\n", + "PoseHeaderDimensions(width=1280, height=720, depth=0)\n", + "Bounding Box: False\n", + "Components:\n", + "PoseHeaderComponent: POSE_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']\n", + " Limbs: 35\n", + " Colors: 1\n", + "\n", + "PoseHeaderComponent: FACE_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467']\n", + " Limbs: 2556\n", + " Colors: 1\n", + "\n", + "PoseHeaderComponent: LEFT_HAND_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP', 'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP', 'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP', 'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']\n", + " Limbs: 21\n", + " Colors: 21\n", + "\n", + "PoseHeaderComponent: RIGHT_HAND_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP', 'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP', 'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP', 'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']\n", + " Limbs: 21\n", + " Colors: 21\n", + "\n", + "PoseHeaderComponent: POSE_WORLD_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']\n", + " Limbs: 35\n", + " Colors: 1\n", + "\n", + "\n", + "NumPyPoseBody\n", + "FPS: 30.101957321166992\n", + "Data: (93, 1, 576, 3), float32\n", + "Confidence shape: (93, 1, 576), float32\n", + "Duration (seconds): 3.0895000948860085\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note:\n", + "(93, 1, 576, 3)\n", + "\n", + "This implies the file has data for 93 frames, 1 person, 576 keypoints, and 3 dimensions (xyz coordinates)." + ], + "metadata": { + "id": "Nrw6Lz5hVQs4" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Longer Pose with Refined Mediapipe Landmarks.\n", + "\n", + "Mediapipe has an option to \"refine\" landmarks.\n" + ], + "metadata": { + "id": "AJyrlWM_U_6Z" + } + }, + { + "cell_type": "code", + "source": [ + "house_2_path = Path(\"/content/pose-evaluation/pose_evaluation/utils/test/test_data/mediapipe/refined_landmarks/colin-HOUSE-needs-trim.pose\")\n", + "house2_pose = Pose.read(house_2_path.read_bytes())\n", + "print(house2_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nv1SDdHSUhLx", + "outputId": "8b1421d2-6536-4152-8594-b34a37e5435d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Pose\n", + "PoseHeader\n", + "Version: 0.20000000298023224\n", + "PoseHeaderDimensions(width=1280, height=720, depth=0)\n", + "Bounding Box: False\n", + "Components:\n", + "PoseHeaderComponent: POSE_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']\n", + " Limbs: 35\n", + " Colors: 1\n", + "\n", + "PoseHeaderComponent: FACE_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '210', '211', '212', '213', '214', '215', '216', '217', '218', '219', '220', '221', '222', '223', '224', '225', '226', '227', '228', '229', '230', '231', '232', '233', '234', '235', '236', '237', '238', '239', '240', '241', '242', '243', '244', '245', '246', '247', '248', '249', '250', '251', '252', '253', '254', '255', '256', '257', '258', '259', '260', '261', '262', '263', '264', '265', '266', '267', '268', '269', '270', '271', '272', '273', '274', '275', '276', '277', '278', '279', '280', '281', '282', '283', '284', '285', '286', '287', '288', '289', '290', '291', '292', '293', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '320', '321', '322', '323', '324', '325', '326', '327', '328', '329', '330', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '345', '346', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '358', '359', '360', '361', '362', '363', '364', '365', '366', '367', '368', '369', '370', '371', '372', '373', '374', '375', '376', '377', '378', '379', '380', '381', '382', '383', '384', '385', '386', '387', '388', '389', '390', '391', '392', '393', '394', '395', '396', '397', '398', '399', '400', '401', '402', '403', '404', '405', '406', '407', '408', '409', '410', '411', '412', '413', '414', '415', '416', '417', '418', '419', '420', '421', '422', '423', '424', '425', '426', '427', '428', '429', '430', '431', '432', '433', '434', '435', '436', '437', '438', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '449', '450', '451', '452', '453', '454', '455', '456', '457', '458', '459', '460', '461', '462', '463', '464', '465', '466', '467', '468', '469', '470', '471', '472', '473', '474', '475', '476', '477']\n", + " Limbs: 2564\n", + " Colors: 1\n", + "\n", + "PoseHeaderComponent: LEFT_HAND_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP', 'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP', 'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP', 'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']\n", + " Limbs: 21\n", + " Colors: 21\n", + "\n", + "PoseHeaderComponent: RIGHT_HAND_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP', 'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP', 'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP', 'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']\n", + " Limbs: 21\n", + " Colors: 21\n", + "\n", + "PoseHeaderComponent: POSE_WORLD_LANDMARKS\n", + " Format: XYZC\n", + " Points: ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']\n", + " Limbs: 35\n", + " Colors: 1\n", + "\n", + "\n", + "NumPyPoseBody\n", + "FPS: 22.392236709594727\n", + "Data: (172, 1, 586, 3), float32\n", + "Confidence shape: (172, 1, 586), float32\n", + "Duration (seconds): 7.681233555659077\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Note:\n", + "(172, 1, 586, 3)\n", + "\n", + "This implies the file has data for 93 frames, 1 person, 576 keypoints, and 3 dimensions (xyz coordinates)." + ], + "metadata": { + "id": "6Ta6oHJ2VhU4" + } + }, + { + "cell_type": "code", + "source": [ + "type(house2_pose.body.data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iyfxIlGyWVAj", + "outputId": "d903310a-bc06-4624-a6aa-5e98ab1f191b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "numpy.ma.core.MaskedArray" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Do these have masked/missing values?" + ], + "metadata": { + "id": "p3f0SxZGWvBP" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy.ma as ma\n" + ], + "metadata": { + "id": "BxusDfYuWl4q" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "ma.count_masked(house1_pose.body.data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GLe8JB6PWz8y", + "outputId": "5bba27bd-6ff9-473a-e2db-b2e2917e34bd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "6489" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "frames_with_missing_values = 0\n", + "for frame_index, frame in enumerate(house1_pose.body.data):\n", + " if ma.count_masked(frame) > 0:\n", + " # print(f\"Frame {frame_index} is missing {ma.count_masked(frame)} values\")\n", + " frames_with_missing_values += 1\n", + "print(f\"There are {frames_with_missing_values} frames with missing values\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DbVKyVWbYcHm", + "outputId": "073f6e38-2e75-4d27-d1c7-3788d6a4c779" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "There are 54 frames with missing values\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "8PZA2mbTc2Qr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "ma.count_masked(house2_pose.body.data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dO1izlhzWzn3", + "outputId": "48a967f5-b6b1-45d8-ed20-630333a67c9c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "17703" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "frames_with_missing_values = 0\n", + "for frame_index, frame in enumerate(house2_pose.body.data):\n", + " if ma.count_masked(frame) > 0:\n", + " # for person_index, person in enumerate(frame):\n", + " # for keypoint_index, keypoint in enumerate(person):\n", + " # if ma.is_masked(keypoint):\n", + " # print(f\"\\tKeypoint {keypoint_index} is missing {ma.count_masked(keypoint)}\")\n", + " # print(f\"Frame {frame_index} is missing {ma.count_masked(frame)} values\")\n", + " frames_with_missing_values += 1\n", + "print(f\"There are {frames_with_missing_values} frames with missing values\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DkvLfEa4Y8-n", + "outputId": "77e7132f-7a13-449d-9e0d-7ae9ff518b62" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "There are 141 frames with missing values\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### How to compare these?\n", + "* One is much longer than the other. (172 frames vs 93)\n", + "* One has more keypoints than the other. (586 keypoints vs 576).\n", + "* different keypoints are missing at different times.\n", + "\n", + "Previous pose metrics e.g. DTW-MJE may not define how to deal with these issues.\n", + "\n", + "Fortunately, DTW$p$ and nDTW$p$ already have strategies defined for these and other issues!\n" + ], + "metadata": { + "id": "N1oQAt6yVjvh" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Get Scores with DTW$p$" + ], + "metadata": { + "id": "hPyNdDdQavWj" + } + }, + { + "cell_type": "code", + "source": [ + "DTWp(house1_pose, house2_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VRvQJ5qhVOAD", + "outputId": "a5d27bcb-48c5-4ad1-f5bd-3c158f1d0a0b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1991.93458720359" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### Get Scores with DTW$p$ with signatures" + ], + "metadata": { + "id": "yJW37kIha2Y3" + } + }, + { + "cell_type": "code", + "source": [ + "DTWp.score_with_signature(house1_pose, house2_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O1P8S-MdapnM", + "outputId": "cf8c694a-2b1d-45f1-e728-3e62c1ebc148" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "startendtrimmed_unnormalized_hands_defaultdist0.0_nointerp_dtw_fillmasked10.0_dtaiDTWAggregatedDistanceMetricFast|higher_is_better:no|pose_preprocessors:[trim_pose|start:yes|end:yes,get_hands_only,fill_masked_or_invalid|fill_val:10.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:0.0|aggregation_strategy:mean|use_fast:yes} = 36.58" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### The Preprocessing allows it\n", + "The reason this works is because of the preprocessing pipeline included in the metric. Let's examine the pipeline." + ], + "metadata": { + "id": "8S7aoecZa8Xz" + } + }, + { + "cell_type": "code", + "source": [ + "DTWp.pose_preprocessors" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "At3_odwIbYDl", + "outputId": "59a65898-b726-47c2-e752-4aff3b7f7041" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[trim_pose|start:yes|end:yes,\n", + " get_hands_only,\n", + " fill_masked_or_invalid|fill_val:10.0,\n", + " reduce_poses_to_intersection]" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "DTWp applies the following preprocessors in this order:\n", + "* `trim_pose|start:yes|end:yes` This means it trims starting and ending sections of the video where the hands are below the shoulders (and therefore not signing).\n", + "* `get_hands_only` Keeps only the keypoints in the hands.\n", + "* `fill_masked_or_invalid|fill_val:10.0`. Fills masked values with 10.0\n", + "\n", + "We can also call preprocess separately from scoring and examine the results" + ], + "metadata": { + "id": "bO9dazpAbbtN" + } + }, + { + "cell_type": "code", + "source": [ + "preprocessed_poses = DTWp.process_poses([house1_pose, house2_pose])\n", + "house1_pose_preprocessed = preprocessed_poses[0]\n", + "house2_pose_preprocessed = preprocessed_poses[1]\n" + ], + "metadata": { + "id": "Zorvzxyaa_zz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "house1_pose_preprocessed.body.data.shape\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kuaQXKAhbOBL", + "outputId": "cc88b76a-742b-40d3-8153-8c118a4d5bf0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(63, 1, 42, 3)" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The first pose has gotten shorted, and only has 42 keypoints now." + ], + "metadata": { + "id": "BtftCsrQcVZZ" + } + }, + { + "cell_type": "code", + "source": [ + "house2_pose_preprocessed.body.data.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2UautkMubS1c", + "outputId": "d9b98e28-6e15-44c7-edff-afa6a4e33cb5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(45, 1, 42, 3)" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The second pose has gotten even shorter, despite being much longer originally. In this video I (Colin) deliberately sat for a while not moving, hands out of view, before performing the sign. These parts of the video were removed!" + ], + "metadata": { + "id": "D_wvGBvBcbFR" + } + }, + { + "cell_type": "code", + "source": [ + "ma.count_masked(house1_pose_preprocessed.body.data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G_p6DYBYb6tK", + "outputId": "80b27550-cbb3-4ba7-8c09-16ffa1d349c5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ma.count_masked(house2_pose_preprocessed.body.data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Sp1fUsC-b86i", + "outputId": "e765414e-3c85-4628-ed23-e707bd797ae7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Neither sequence contains masked values now!" + ], + "metadata": { + "id": "1ryxGzXecrNr" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Get Scores with nDTW$p$" + ], + "metadata": { + "id": "kbvulYFDa6xb" + } + }, + { + "cell_type": "code", + "source": [ + "nDTWp( house1_pose, house2_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ks6yBFOIWBjI", + "outputId": "b1987d59-0ef1-4907-a914-e3eb886e770d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "4.158450855906123" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "nDTWp.score_with_signature(house1_pose, house2_pose)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IrEmw2l0cNse", + "outputId": "5db7c4e1-93e4-4da1-f394-240364520478" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "untrimmed_normalizedbyshoulders_hands_defaultdist1.0_nointerp_dtw_fillmasked1.0_dtaiDTWAggregatedDistanceMetricFast|higher_is_better:no|pose_preprocessors:[normalize_poses|scale_factor:1,get_hands_only,fill_masked_or_invalid|fill_val:1.0,reduce_poses_to_intersection]|distance_measure:{dtaiDTWAggregatedDistanceMeasureFast|default_distance:1.0|aggregation_strategy:mean|use_fast:yes} = 4.16" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "nDTWp.pose_preprocessors" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kR6qzb0CcO2k", + "outputId": "d258faef-3673-4044-9b02-7e12413e6113" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[normalize_poses|scale_factor:1,\n", + " get_hands_only,\n", + " fill_masked_or_invalid|fill_val:1.0,\n", + " reduce_poses_to_intersection]" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + } + ] +} \ No newline at end of file