From 0c7e2f873dbee26c2e50275768a9f8328195c8c5 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Wed, 3 Dec 2025 13:24:39 +0100 Subject: [PATCH 1/6] Toolchain changes for OEM version --- .circleci/base_config.yml | 4 ++-- .circleci/config.yml | 6 +++++- .circleci/generate_config.py | 20 +++++++++---------- CIRCLECI.md | 4 ++++ site/config/_default/config.yaml | 1 + site/data/versions.yaml | 6 ++++++ .../layouts/partials/version-selector.html | 2 +- .../layouts/shortcodes/full-version.html | 2 +- .../layouts/shortcodes/program-options.md | 4 ++-- .../arangoproxy/internal/service/service.go | 16 ++++++++++----- toolchain/docker/amd64/docker-compose.yml | 5 ++++- toolchain/docker/arm64/docker-compose.yml | 5 ++++- toolchain/docker/config.yaml | 3 +++ toolchain/docker/docker-compose.local.yml | 3 +++ toolchain/scripts/toolchain.sh | 5 +++++ 15 files changed, 62 insertions(+), 24 deletions(-) diff --git a/.circleci/base_config.yml b/.circleci/base_config.yml index 87ce0bd341..3c961a107a 100644 --- a/.circleci/base_config.yml +++ b/.circleci/base_config.yml @@ -28,7 +28,7 @@ commands: git clone --depth 1 --branch $branch_name --recurse-submodules --shallow-submodules --jobs 8 https://github.com/arangodb/arangodb.git /root/project - if [ "<< parameters.version >>" = "3.10" ] || [ "<< parameters.version >>" = "3.11" ]; then + if [ "<< parameters.version >>" = "3.10" ] || [ "<< parameters.version >>" = "3.11" ] || [ "<< parameters.version >>" = "oem" ]; then ENTERPRISE_BRANCH="<< parameters.version >>" else ENTERPRISE_BRANCH="devel" @@ -131,7 +131,7 @@ commands: set +e if [ "<< parameters.version >>" = "3.10" ]; then cmake --preset enterprise-pr -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_LIBRARY_PATH=$OPENSSL_ROOT_DIR/lib -DUSE_MAINTAINER_MODE=Off -DUSE_GOOGLE_TESTS=Off -DUSE_FAILURE_TESTS=Off - elif [ "<< parameters.version >>" = "3.11" ]; then + elif [ "<< parameters.version >>" = "3.11" ] || [ "<< parameters.version >>" = "oem" ]; then # The OpenSSL dir that CMake discovers needs to be adjacent to where ldap.h is located, here: /opt cmake --preset enterprise-pr -DCMAKE_C_COMPILER=/tools/clang -DCMAKE_CXX_COMPILER=/tools/clang++ -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_LIBRARY_PATH=$OPENSSL_ROOT_DIR/lib -DOPENSSL_ROOT_DIR=/opt -DUSE_MAINTAINER_MODE=Off -DUSE_GOOGLE_TESTS=Off -DUSE_FAILURE_TESTS=Off else diff --git a/.circleci/config.yml b/.circleci/config.yml index 671353907a..f49bc8724f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -39,6 +39,10 @@ parameters: type: string default: "undefined" + arangodb-oem: + type: string + default: "undefined" + arangodb-3_12: type: string default: "undefined" @@ -116,7 +120,7 @@ jobs: python3 generate_config.py \ --workflow << pipeline.parameters.workflow >> \ - --arangodb-branches << pipeline.parameters.arangodb-3_10 >> << pipeline.parameters.arangodb-3_11 >> << pipeline.parameters.arangodb-3_12 >> << pipeline.parameters.arangodb-4_0 >> \ + --arangodb-branches << pipeline.parameters.arangodb-3_10 >> << pipeline.parameters.arangodb-3_11 >> << pipeline.parameters.arangodb-oem >> << pipeline.parameters.arangodb-3_12 >> << pipeline.parameters.arangodb-4_0 >> \ --arangodb-branch << pipeline.parameters.arangodb-branch >> \ --release-type << pipeline.parameters.release-type >> \ --docs-version << pipeline.parameters.docs-version >> \ diff --git a/.circleci/generate_config.py b/.circleci/generate_config.py index 8a3cb25c3c..d5bcb931b2 100644 --- a/.circleci/generate_config.py +++ b/.circleci/generate_config.py @@ -135,7 +135,7 @@ def workflow_generate(config): } }) - if version in ["3.10", "3.11"]: + if version in ["3.10", "3.11", "oem"]: if openssl.startswith("3.0"): compileJob["compile-linux"]["build-image"] = "arangodb/build-alpine-x86_64:3.16-gcc11.2-openssl3.0.10" elif openssl.startswith("3.1"): @@ -143,9 +143,9 @@ def workflow_generate(config): elif openssl.startswith("1.1"): compileJob["compile-linux"]["build-image"] = "arangodb/build-alpine-x86_64:3.16-gcc11.2-openssl1.1.1s" else: - compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-311:1" # clang-16 - else: # build image for 3.12.5 and devel as of 2025-02-04 - compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-devel:11" # clang-19 + compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-311:6" # clang-16 + else: # build image for 3.12.6 and devel as of 2025-12-03 + compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-devel:18" # clang-19 print(f"compileJob = {compileJob}") @@ -191,7 +191,7 @@ def workflow_generate_scheduled(config): "compile-linux": { "context": ["sccache-aws-bucket"], "name": f"compile-{version}", - "arangodb-branch": f"arangodb/enterprise-preview:{version}-nightly" if version in ["3.10", "3.11"] else "arangodb/enterprise-preview:devel-nightly", # TODO: Any other 3.12.x image we could use? + "arangodb-branch": f"arangodb/enterprise-preview:{"3.11-nightly" if version == "oem" else version}-nightly" if version in ["3.10", "3.11", "oem"] else "arangodb/enterprise-preview:devel-nightly", # TODO: Any other 3.12.x image we could use? "version": version } } @@ -241,7 +241,7 @@ def workflow_release_arangodb(config): } } - if args.docs_version in ["3.10", "3.11"]: + if args.docs_version in ["3.10", "3.11", "oem"]: if openssl.startswith("3.0"): compileJob["compile-linux"]["build-image"] = "arangodb/build-alpine-x86_64:3.16-gcc11.2-openssl3.0.10" elif openssl.startswith("3.1"): @@ -249,9 +249,9 @@ def workflow_release_arangodb(config): elif openssl.startswith("1.1"): compileJob["compile-linux"]["build-image"] = "arangodb/build-alpine-x86_64:3.16-gcc11.2-openssl1.1.1s" else: - compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-devel:9" # 3.11.13 - else: # build image for 3.12.4 and devel as of 2024-11-25 - compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-devel:9" + compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-311:6" # clang-16 + else: # build image for 3.12.6 and devel as of 2025-12-03 + compileJob["compile-linux"]["build-image"] = "arangodb/ubuntubuildarangodb-devel:18" # clang-19 config["jobs"]["compile-linux"]["steps"].append({ "compile-and-dockerize-arangodb": { @@ -307,7 +307,7 @@ def workflow_generate_launch_command(config): branch = args.arangodb_branches[i] if args.workflow != "generate": #generate scheduled etc. - branch = f"arangodb/enterprise-preview:{version}-nightly" if version in ["3.10", "3.11"] else "arangodb/enterprise-preview:devel-nightly" # TODO: Any other 3.12.x image we could use? + branch = f"arangodb/enterprise-preview:{"3.11-nightly" if version == "oem" else version}-nightly" if version in ["3.10", "3.11", "oem"] else "arangodb/enterprise-preview:devel-nightly" # TODO: Any other 3.12.x image we could use? if branch == "undefined": continue diff --git a/CIRCLECI.md b/CIRCLECI.md index 1e5c965bc0..378cd9f1eb 100644 --- a/CIRCLECI.md +++ b/CIRCLECI.md @@ -77,6 +77,7 @@ arguments are invoked: | string | `workflow` | `generate` | | string | `arangodb-3_10` | [Upstream reference](#upstream-references) for 3.10 | | string | `arangodb-3_11` | [Upstream reference](#upstream-references) for 3.11 | +| string | `arangodb-oem` | [Upstream reference](#upstream-references) for OEM LTS | | string | `arangodb-3_12` | [Upstream reference](#upstream-references) for 3.12 | | string | `generators` | `examples` | | string | `deploy-url` | `deploy-preview-{PR_NUMBER}` | @@ -95,6 +96,7 @@ arguments are invoked: | string | `workflow` | `generate` | | string | `arangodb-3_10` | [Upstream reference](#upstream-references) for 3.10 | | string | `arangodb-3_11` | [Upstream reference](#upstream-references) for 3.11 | +| string | `arangodb-oem` | [Upstream reference](#upstream-references) for OEM LTS | | string | `arangodb-3_12` | [Upstream reference](#upstream-references) for 3.12 | | string | `generators` | `examples` | | string | `deploy-url` | `deploy-preview-{PR_NUMBER}` | @@ -123,6 +125,7 @@ or for multiple versions. | string | `workflow` | `generate` | | string | `arangodb-3_10` | [Upstream reference](#upstream-references) for 3.10 | | string | `arangodb-3_11` | [Upstream reference](#upstream-references) for 3.11 | +| string | `arangodb-oem` | [Upstream reference](#upstream-references) for OEM LTS | | string | `arangodb-3_12` | [Upstream reference](#upstream-references) for 3.12 | | string | `generators` | `examples` | | boolean | `commit-generated` | `true` | @@ -204,6 +207,7 @@ Invoke Args: | string | `workflow` | `generate-scheduled` | | string | `arangodb-3_10` | `arangodb/enterprise-preview:3.10-nightly` | | string | `arangodb-3_11` | `arangodb/enterprise-preview:3.11-nightly` | +| string | `arangodb-oem` | `arangodb/enterprise-preview:oem-nightly` | | string | `arangodb-3_12` | `arangodb/enterprise-preview:devel-nightly` | | string | `generators` | `metrics error-codes exit-codes optimizer options` | | boolean | `commit-generated` | `true` | diff --git a/site/config/_default/config.yaml b/site/config/_default/config.yaml index 6804003567..63e22aeaab 100644 --- a/site/config/_default/config.yaml +++ b/site/config/_default/config.yaml @@ -26,6 +26,7 @@ module: #excludeFiles: # - arangodb/3.10/* # - arangodb/3.11/* + # - arangodb/oem/* # - arangodb/4.0/* - source: content/arangodb/3.12 diff --git a/site/data/versions.yaml b/site/data/versions.yaml index fa3c6500f2..04d7bf6ea7 100644 --- a/site/data/versions.yaml +++ b/site/data/versions.yaml @@ -12,6 +12,12 @@ deprecated: false inDevelopment: false + - name: "oem" + version: "OEM LTS" + alias: "oem" + deprecated: false + inDevelopment: false + - name: "3.11" version: "3.11.14" alias: "3.11" diff --git a/site/themes/arangodb-docs-theme/layouts/partials/version-selector.html b/site/themes/arangodb-docs-theme/layouts/partials/version-selector.html index 69cb3aeaa8..094a13a111 100644 --- a/site/themes/arangodb-docs-theme/layouts/partials/version-selector.html +++ b/site/themes/arangodb-docs-theme/layouts/partials/version-selector.html @@ -3,7 +3,7 @@ {{ $versions := index site.Data.versions "/arangodb/" -}} {{ range $i, $version := $versions -}} {{ end -}} diff --git a/site/themes/arangodb-docs-theme/layouts/shortcodes/full-version.html b/site/themes/arangodb-docs-theme/layouts/shortcodes/full-version.html index 20883b4ac0..384f6178c2 100644 --- a/site/themes/arangodb-docs-theme/layouts/shortcodes/full-version.html +++ b/site/themes/arangodb-docs-theme/layouts/shortcodes/full-version.html @@ -1,5 +1,5 @@ {{ $ver := (.Get 0) -}} -{{- $versions := (where (index .Site.Data.versions "/arangodb/") "name" $ver) -}} +{{ $versions := (where (index .Site.Data.versions "/arangodb/") "name" $ver) -}} {{ if $versions -}} {{ (index $versions 0).version | htmlEscape -}} {{ else -}} diff --git a/site/themes/arangodb-docs-theme/layouts/shortcodes/program-options.md b/site/themes/arangodb-docs-theme/layouts/shortcodes/program-options.md index b47b578aac..8f586c96bf 100644 --- a/site/themes/arangodb-docs-theme/layouts/shortcodes/program-options.md +++ b/site/themes/arangodb-docs-theme/layouts/shortcodes/program-options.md @@ -57,7 +57,7 @@ {{- if $option.experimental }} {{- $badges = $badges | append "Experimental"}} {{- end }} -{{- if or (eq $pageVersion "3.10") (eq $pageVersion "3.11") }}{{/* Only one Edition v3.12.5+ */}} +{{- if or (eq $pageVersion "3.10") (eq $pageVersion "3.11") (eq $pageVersion "oem") }}{{/* Only one Edition v3.12.5+ */}} {{- if $option.enterpriseOnly }} {{- $badges = $badges | append "Enterprise Edition" }} {{- end }} @@ -96,7 +96,7 @@ Default: `{{ string (index (slice | append .) 0) }}` {{ . }} {{ end }} -{{ if or (eq $pageVersion "3.10") (eq $pageVersion "3.11") }}{{/* No Windows/macOS in 3.12+, logic can be removed after 3.11 EOL */}} +{{ if or (eq $pageVersion "3.10") (eq $pageVersion "3.11") (eq $pageVersion "oem") }}{{/* No Windows/macOS in 3.12+ */}} {{ with $option.os }} {{ $size := . | len }} {{ if lt $size 3 }}{{/* needs to be equal to the total number of possible OSes */}} diff --git a/toolchain/arangoproxy/internal/service/service.go b/toolchain/arangoproxy/internal/service/service.go index 3f2546e1c6..009c123487 100644 --- a/toolchain/arangoproxy/internal/service/service.go +++ b/toolchain/arangoproxy/internal/service/service.go @@ -175,17 +175,23 @@ func init() { "name": "Business Source License 1.1", "url": "https://github.com/arangodb/arangodb/blob/devel/LICENSE", } - if version.Name == "3.10" || version.Name == "3.11" { + displayVersion := version.Version + if version.Name == "3.10" || version.Name == "3.11" || version.Name == "oem" { + branch := version.Name + if version.Name == "oem" { + branch = "3.11.14" + displayVersion = "OEM LTS" + } license["name"] = "Apache 2.0" - license["url"] = fmt.Sprintf("https://github.com/arangodb/arangodb/blob/%s/LICENSE", version.Name) + license["url"] = fmt.Sprintf("https://github.com/arangodb/arangodb/blob/%s/LICENSE", branch) } OpenapiGlobalMap[version.Name] = map[string]interface{}{ "openapi": "3.1.0", "info": map[string]interface{}{ - "title": "ArangoDB Core API", - "summary": "The RESTful HTTP API of the ArangoDB Core Database System", - "version": version.Version, + "title": "ArangoDB API", + "summary": "The RESTful HTTP API of the ArangoDB database system", + "version": displayVersion, "license": license, "contact": map[string]interface{}{ "name": "ArangoDB Inc.", diff --git a/toolchain/docker/amd64/docker-compose.yml b/toolchain/docker/amd64/docker-compose.yml index 3a94f13a31..c9c38ed49b 100644 --- a/toolchain/docker/amd64/docker-compose.yml +++ b/toolchain/docker/amd64/docker-compose.yml @@ -5,7 +5,8 @@ services: tty: true volumes: - ${ARANGODB_SRC_3_10:-/dev/null}:/tmp/3.10 - - ${ARANGODB_SRC_3_11:-/tmp/1}:/tmp/3.11 + - ${ARANGODB_SRC_3_11:-/tmp/0}:/tmp/3.11 + - ${ARANGODB_SRC_OEM:-/tmp/1}:/tmp/oem - ${ARANGODB_SRC_3_12:-/tmp/2}:/tmp/3.12 - ${ARANGODB_SRC_4_0:-/tmp/3}:/tmp/4.0 - ../../../:/home/ @@ -18,9 +19,11 @@ services: OVERRIDE: ${OVERRIDE} ARANGODB_SRC_3_10: ${ARANGODB_SRC_3_10} ARANGODB_SRC_3_11: ${ARANGODB_SRC_3_11} + ARANGODB_SRC_OEM: ${ARANGODB_SRC_OEM} ARANGODB_SRC_3_12: ${ARANGODB_SRC_3_12} ARANGODB_SRC_4_0: ${ARANGODB_SRC_4_0} ARANGODB_BRANCH_3_10: ${ARANGODB_BRANCH_3_10} ARANGODB_BRANCH_3_11: ${ARANGODB_BRANCH_3_11} + ARANGODB_BRANCH_OEM: ${ARANGODB_BRANCH_OEM} ARANGODB_BRANCH_3_12: ${ARANGODB_BRANCH_3_12} ARANGODB_BRANCH_4_0: ${ARANGODB_BRANCH_4_0} diff --git a/toolchain/docker/arm64/docker-compose.yml b/toolchain/docker/arm64/docker-compose.yml index 451ba458c4..942479ea56 100644 --- a/toolchain/docker/arm64/docker-compose.yml +++ b/toolchain/docker/arm64/docker-compose.yml @@ -5,7 +5,8 @@ services: tty: true volumes: - ${ARANGODB_SRC_3_10:-/dev/null}:/tmp/3.10 - - ${ARANGODB_SRC_3_11:-/tmp/1}:/tmp/3.11 + - ${ARANGODB_SRC_3_11:-/tmp/0}:/tmp/3.11 + - ${ARANGODB_SRC_OEM:-/tmp/1}:/tmp/oem - ${ARANGODB_SRC_3_12:-/tmp/2}:/tmp/3.12 - ${ARANGODB_SRC_4_0:-/tmp/3}:/tmp/4.0 - ../../../:/home/ @@ -18,9 +19,11 @@ services: OVERRIDE: ${OVERRIDE} ARANGODB_SRC_3_10: ${ARANGODB_SRC_3_10} ARANGODB_SRC_3_11: ${ARANGODB_SRC_3_11} + ARANGODB_SRC_OEM: ${ARANGODB_SRC_OEM} ARANGODB_SRC_3_12: ${ARANGODB_SRC_3_12} ARANGODB_SRC_4_0: ${ARANGODB_SRC_4_0} ARANGODB_BRANCH_3_10: ${ARANGODB_BRANCH_3_10} ARANGODB_BRANCH_3_11: ${ARANGODB_BRANCH_3_11} + ARANGODB_BRANCH_OEM: ${ARANGODB_BRANCH_OEM} ARANGODB_BRANCH_3_12: ${ARANGODB_BRANCH_3_12} ARANGODB_BRANCH_4_0: ${ARANGODB_BRANCH_4_0} diff --git a/toolchain/docker/config.yaml b/toolchain/docker/config.yaml index 7bbe4b5d51..59b08752d7 100644 --- a/toolchain/docker/config.yaml +++ b/toolchain/docker/config.yaml @@ -6,6 +6,9 @@ servers: - image: ${ARANGODB_BRANCH_3_11_IMAGE} version: ${ARANGODB_BRANCH_3_11_VERSION} + - image: ${ARANGODB_BRANCH_OEM_IMAGE} + version: ${ARANGODB_BRANCH_OEM_VERSION} + - image: ${ARANGODB_BRANCH_3_12_IMAGE} version: ${ARANGODB_BRANCH_3_12_VERSION} diff --git a/toolchain/docker/docker-compose.local.yml b/toolchain/docker/docker-compose.local.yml index ae6dcaa313..d10106ea07 100644 --- a/toolchain/docker/docker-compose.local.yml +++ b/toolchain/docker/docker-compose.local.yml @@ -7,6 +7,7 @@ services: target: toolchain volumes: - ${ARANGODB_SRC:-/dev/null}:${ARANGODB_SRC:-/dev/null} + - ${ARANGODB_SRC_1:-/tmp/0}:${ARANGODB_SRC_1:-/tmp/0} - ${ARANGODB_SRC_2:-/tmp/1}:${ARANGODB_SRC_2:-/tmp/1} - ${ARANGODB_SRC_3:-/tmp/2}:${ARANGODB_SRC_3:-/tmp/2} - ${ARANGODB_SRC_4:-/tmp/3}:${ARANGODB_SRC_4:-/tmp/3} @@ -19,9 +20,11 @@ services: GENERATORS: ${GENERATORS} ARANGODB_SRC_3_10: ${ARANGODB_SRC_3_10} ARANGODB_SRC_3_11: ${ARANGODB_SRC_3_11} + ARANGODB_SRC_OEM: ${ARANGODB_SRC_OEM} ARANGODB_SRC_3_12: ${ARANGODB_SRC_3_12} ARANGODB_SRC_4_0: ${ARANGODB_SRC_4_0} ARANGODB_BRANCH_3_10: ${ARANGODB_BRANCH_3_10} ARANGODB_BRANCH_3_11: ${ARANGODB_BRANCH_3_11} + ARANGODB_BRANCH_OEM: ${ARANGODB_BRANCH_OEM} ARANGODB_BRANCH_3_12: ${ARANGODB_BRANCH_3_12} ARANGODB_BRANCH_4_0: ${ARANGODB_BRANCH_4_0} diff --git a/toolchain/scripts/toolchain.sh b/toolchain/scripts/toolchain.sh index 6fddae577e..e4fab1319d 100755 --- a/toolchain/scripts/toolchain.sh +++ b/toolchain/scripts/toolchain.sh @@ -48,6 +48,11 @@ if [ "$ARANGODB_BRANCH_3_11" != "" ] ; then export ARANGODB_BRANCH_3_11_VERSION="3.11" fi +if [ "$ARANGODB_BRANCH_OEM" != "" ] ; then + export ARANGODB_BRANCH_OEM_IMAGE="$ARANGODB_BRANCH_OEM" + export ARANGODB_BRANCH_OEM_VERSION="oem" +fi + if [ "$ARANGODB_BRANCH_3_12" != "" ] ; then export ARANGODB_BRANCH_3_12_IMAGE="$ARANGODB_BRANCH_3_12" export ARANGODB_BRANCH_3_12_VERSION="3.12" From 18aebe60141ab7c23afe20e2908734a0a69ff1d4 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Thu, 4 Dec 2025 11:30:11 +0100 Subject: [PATCH 2/6] CircleCI: Authenticate requests & dump GitHub ratelimit headers on failure --- .circleci/config.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f49bc8724f..9987d53406 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -93,7 +93,10 @@ jobs: fetched=false for i in $(seq 1 5); do echo "" - res=$(curl -fsS https://api.github.com/repos/arangodb/docs-hugo/contents/.circleci?ref=$CIRCLE_SHA1) || curlStatus=$? + headers=$(mktemp) + res=$(curl -sS -D "$headers" -H "Authorization: Bearer $CIRCLECI_GITHUB_READONLY" https://api.github.com/repos/arangodb/docs-hugo/contents/.circleci?ref=$CIRCLE_SHA1) || curlStatus=$? + headers_content=$(cat "$headers") + rm "$headers" if [[ -z "${curlStatus:-}" ]]; then urls=$(echo "$res" | jq ".[].download_url") || jqStatus=$? if [[ -z "${jqStatus:-}" ]]; then @@ -103,7 +106,8 @@ jobs: echo "jq failed with $jqStatus, input:" echo "$res" else - echo "curl failed with $curlStatus" + echo "curl failed with $curlStatus, rate limit?" + echo "$headers_content" | grep -i "ratelimit\|retry-after" fi unset curlStatus unset jqStatus @@ -113,8 +117,8 @@ jobs: echo "Failed to fetch download URLs" exit 1 fi - echo "$urls" | xargs wget - wget https://raw.githubusercontent.com/arangodb/docs-hugo/$CIRCLE_SHA1/site/data/versions.yaml + echo "$urls" | xargs -n 1 curl -sS -O -H "Authorization: Bearer $CIRCLECI_GITHUB_READONLY" + curl -sS -O -H "Authorization: Bearer $CIRCLECI_GITHUB_READONLY" https://raw.githubusercontent.com/arangodb/docs-hugo/$CIRCLE_SHA1/site/data/versions.yaml pip install pyyaml requests From c0f9fc5f20a80bb8ef203061fb4ba49323b26345 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Thu, 4 Dec 2025 11:41:42 +0100 Subject: [PATCH 3/6] Add OEM content and update PR template --- PULL_REQUEST_TEMPLATE.md | 1 + .../query-profiling.md | 2 +- site/content/arangodb/3.12/_index.md | 2 +- .../3.12/release-notes/version-3.12/_index.md | 2 +- .../3.12/release-notes/version-oem/_index.md | 6 + .../version-oem/api-changes-in-oem.md | 876 + .../incompatible-changes-in-oem.md | 695 + .../version-oem/known-issues-in-oem.md | 61 + .../version-oem/whats-new-in-oem.md | 1454 ++ site/content/arangodb/4.0/_index.md | 2 +- .../4.0/release-notes/version-3.12/_index.md | 2 +- .../4.0/release-notes/version-4.0/_index.md | 2 +- .../4.0/release-notes/version-oem/_index.md | 6 + .../version-oem/api-changes-in-oem.md | 876 + .../incompatible-changes-in-oem.md | 695 + .../version-oem/known-issues-in-oem.md | 61 + .../version-oem/whats-new-in-oem.md | 1454 ++ site/content/arangodb/oem/_index.md | 42 + site/content/arangodb/oem/aql/_index.md | 36 + .../content/arangodb/oem/aql/common-errors.md | 420 + site/content/arangodb/oem/aql/data-queries.md | 554 + .../aql/examples-and-query-patterns/_index.md | 115 + .../actors-and-movies-dataset-queries.md | 859 + .../examples-and-query-patterns/counting.md | 28 + .../create-test-data.md | 95 + .../diffing-two-documents.md | 128 + .../dynamic-attribute-names.md | 202 + .../examples-and-query-patterns/grouping.md | 359 + .../aql/examples-and-query-patterns/joins.md | 892 + .../projections-and-filters.md | 136 + .../queries-without-collections.md | 50 + .../remove-vertex.md | 83 + .../examples-and-query-patterns/traversals.md | 118 + .../upsert-repsert-guide.md | 335 + .../aql/execution-and-performance/_index.md | 7 + .../caching-query-results.md | 228 + .../explaining-queries.md | 278 + .../parsing-queries.md | 32 + .../query-optimization.md | 626 + .../query-profiling.md | 229 + .../query-statistics.md | 98 + .../arangodb/oem/aql/functions/_index.md | 37 + .../oem/aql/functions/arangosearch.md | 1361 ++ .../arangodb/oem/aql/functions/array.md | 1041 ++ .../content/arangodb/oem/aql/functions/bit.md | 321 + .../arangodb/oem/aql/functions/date.md | 1335 ++ .../oem/aql/functions/document-object.md | 1023 ++ .../arangodb/oem/aql/functions/fulltext.md | 94 + .../content/arangodb/oem/aql/functions/geo.md | 964 ++ .../oem/aql/functions/miscellaneous.md | 803 + .../arangodb/oem/aql/functions/numeric.md | 770 + .../arangodb/oem/aql/functions/string.md | 2070 +++ .../oem/aql/functions/type-check-and-cast.md | 279 + .../arangodb/oem/aql/fundamentals/_index.md | 8 + .../accessing-data-from-collections.md | 78 + .../oem/aql/fundamentals/bind-parameters.md | 169 + .../oem/aql/fundamentals/data-types.md | 290 + .../oem/aql/fundamentals/limitations.md | 104 + .../oem/aql/fundamentals/query-errors.md | 41 + .../oem/aql/fundamentals/query-results.md | 114 + .../oem/aql/fundamentals/subqueries.md | 188 + .../arangodb/oem/aql/fundamentals/syntax.md | 347 + .../aql/fundamentals/type-and-value-order.md | 137 + .../arangodb/oem/aql/graph-queries/_index.md | 47 + .../aql/graph-queries/all-shortest-paths.md | 197 + .../arangodb/oem/aql/graph-queries/k-paths.md | 232 + .../oem/aql/graph-queries/k-shortest-paths.md | 308 + .../oem/aql/graph-queries/shortest-path.md | 228 + .../aql/graph-queries/traversals-explained.md | 85 + .../oem/aql/graph-queries/traversals.md | 890 + .../oem/aql/high-level-operations/_index.md | 9 + .../oem/aql/high-level-operations/collect.md | 375 + .../oem/aql/high-level-operations/filter.md | 125 + .../oem/aql/high-level-operations/for.md | 251 + .../oem/aql/high-level-operations/insert.md | 215 + .../oem/aql/high-level-operations/let.md | 69 + .../oem/aql/high-level-operations/limit.md | 96 + .../oem/aql/high-level-operations/remove.md | 185 + .../oem/aql/high-level-operations/replace.md | 306 + .../oem/aql/high-level-operations/return.md | 212 + .../oem/aql/high-level-operations/search.md | 337 + .../oem/aql/high-level-operations/sort.md | 109 + .../oem/aql/high-level-operations/update.md | 429 + .../oem/aql/high-level-operations/upsert.md | 276 + .../oem/aql/high-level-operations/window.md | 282 + .../oem/aql/high-level-operations/with.md | 71 + .../oem/aql/how-to-invoke-aql/_index.md | 30 + .../aql/how-to-invoke-aql/with-arangosh.md | 786 + .../with-the-web-interface.md | 50 + site/content/arangodb/oem/aql/operators.md | 816 + .../oem/aql/user-defined-functions.md | 405 + .../content/arangodb/oem/components/_index.md | 6 + .../oem/components/arangodb-server/_index.md | 21 + .../arangodb-server/environment-variables.md | 108 + .../oem/components/arangodb-server/ldap.md | 563 + .../oem/components/arangodb-server/options.md | 56 + .../arangodb-server/storage-engine.md | 198 + .../arangodb/oem/components/tools/_index.md | 35 + .../components/tools/arangobackup/_index.md | 41 + .../components/tools/arangobackup/examples.md | 340 + .../components/tools/arangobackup/options.md | 15 + .../components/tools/arangobench/_index.md | 204 + .../components/tools/arangobench/options.md | 40 + .../components/tools/arangodb-shell/_index.md | 20 + .../tools/arangodb-shell/details.md | 207 + .../tools/arangodb-shell/examples.md | 88 + .../tools/arangodb-shell/options.md | 12 + .../tools/arangodb-starter/_index.md | 8 + .../tools/arangodb-starter/architecture.md | 229 + .../tools/arangodb-starter/options.md | 465 + .../tools/arangodb-starter/security.md | 132 + .../oem/components/tools/arangodump/_index.md | 20 + .../components/tools/arangodump/examples.md | 317 + .../tools/arangodump/limitations.md | 16 + .../components/tools/arangodump/maskings.md | 1050 ++ .../components/tools/arangodump/options.md | 14 + .../components/tools/arangoexport/_index.md | 13 + .../components/tools/arangoexport/examples.md | 233 + .../components/tools/arangoexport/options.md | 12 + .../components/tools/arangoimport/_index.md | 13 + .../components/tools/arangoimport/details.md | 197 + .../tools/arangoimport/examples-csv.md | 335 + .../tools/arangoimport/examples-json.md | 272 + .../components/tools/arangoimport/options.md | 12 + .../components/tools/arangoinspect/_index.md | 8 + .../tools/arangoinspect/examples.md | 122 + .../components/tools/arangoinspect/options.md | 12 + .../components/tools/arangorestore/_index.md | 17 + .../tools/arangorestore/examples.md | 366 + .../components/tools/arangorestore/options.md | 12 + .../components/tools/arangovpack/_index.md | 12 + .../components/tools/arangovpack/options.md | 12 + .../oem/components/tools/foxx-cli/_index.md | 17 + .../oem/components/tools/foxx-cli/details.md | 178 + .../oem/components/web-interface/_index.md | 16 + .../oem/components/web-interface/cluster.md | 78 + .../components/web-interface/collections.md | 72 + .../oem/components/web-interface/dashboard.md | 37 + .../oem/components/web-interface/document.md | 21 + .../oem/components/web-interface/graphs.md | 194 + .../oem/components/web-interface/logs.md | 20 + .../oem/components/web-interface/queries.md | 117 + .../oem/components/web-interface/services.md | 50 + .../oem/components/web-interface/users.md | 40 + site/content/arangodb/oem/concepts/_index.md | 6 + .../arangodb/oem/concepts/data-models.md | 86 + .../arangodb/oem/concepts/data-retrieval.md | 24 + .../oem/concepts/data-structure/_index.md | 67 + .../concepts/data-structure/collections.md | 751 + .../oem/concepts/data-structure/databases.md | 604 + .../data-structure/documents/_index.md | 1135 ++ .../documents/computed-values.md | 435 + .../documents/schema-validation.md | 404 + .../oem/concepts/data-structure/views.md | 766 + .../arangodb/oem/data-science/_index.md | 141 + .../oem/data-science/arangograph-notebooks.md | 24 + .../oem/data-science/arangographml/_index.md | 181 + .../oem/data-science/arangographml/deploy.md | 77 + .../arangographml/getting-started.md | 967 ++ .../oem/data-science/llm-knowledge-graphs.md | 75 + .../oem/data-science/pregel/_index.md | 327 + .../oem/data-science/pregel/algorithms.md | 367 + site/content/arangodb/oem/deploy/_index.md | 144 + .../oem/deploy/active-failover/_index.md | 127 + .../deploy/active-failover/administration.md | 79 + .../deploy/active-failover/manual-start.md | 276 + .../using-the-arangodb-starter.md | 158 + .../arangodb/oem/deploy/arangosync/_index.md | 129 + .../oem/deploy/arangosync/administration.md | 147 + .../deploy/arangosync/deployment/_index.md | 110 + .../arangosync/deployment/arangodb-cluster.md | 108 + .../deployment/arangosync-master.md | 78 + .../deployment/arangosync-workers.md | 54 + .../deployment/prometheus-and-grafana.md | 103 + .../oem/deploy/arangosync/monitoring.md | 90 + .../arangosync/operations-and-maintenance.md | 70 + .../oem/deploy/arangosync/security.md | 182 + .../oem/deploy/arangosync/troubleshooting.md | 156 + .../oem/deploy/architecture/_index.md | 7 + .../oem/deploy/architecture/data-sharding.md | 192 + .../oem/deploy/architecture/replication.md | 119 + .../oem/deploy/architecture/scalability.md | 83 + .../arangodb/oem/deploy/cluster/_index.md | 395 + .../oem/deploy/cluster/administration.md | 332 + .../oem/deploy/cluster/deployment/_index.md | 96 + .../deploy/cluster/deployment/manual-start.md | 385 + .../deployment/using-the-arangodb-starter.md | 251 + .../oem/deploy/cluster/limitations.md | 25 + .../arangodb/oem/deploy/in-the-cloud.md | 47 + .../content/arangodb/oem/deploy/kubernetes.md | 23 + site/content/arangodb/oem/deploy/oneshard.md | 320 + .../oem/deploy/production-checklist.md | 100 + .../oem/deploy/single-instance-vs-cluster.md | 168 + .../oem/deploy/single-instance/_index.md | 32 + .../deploy/single-instance/manual-start.md | 73 + .../using-the-arangodb-starter.md | 108 + site/content/arangodb/oem/develop/_index.md | 6 + .../arangodb/oem/develop/drivers/_index.md | 53 + .../arangodb/oem/develop/drivers/go.md | 735 + .../oem/develop/drivers/java/_index.md | 480 + .../java/reference-version-6/_index.md | 71 + .../java/reference-version-6/driver-setup.md | 276 + .../java/reference-version-6/serialization.md | 236 + .../java/reference-version-7/_index.md | 7 + .../changes-in-version-7.md | 459 + .../java/reference-version-7/driver-setup.md | 326 + .../java/reference-version-7/serialization.md | 247 + .../oem/develop/drivers/javascript.md | 222 + .../arangodb/oem/develop/drivers/python.md | 329 + .../arangodb/oem/develop/error-codes.md | 73 + .../arangodb/oem/develop/exit-codes.md | 43 + .../oem/develop/foxx-microservices/_index.md | 74 + .../develop/foxx-microservices/deployment.md | 20 + .../foxx-microservices/getting-started.md | 457 + .../foxx-microservices/guides/_index.md | 30 + .../guides/access-from-the-browser.md | 151 + .../guides/authentication-and-sessions.md | 242 + .../guides/development-mode.md | 86 + .../guides/foxx-in-a-cluster.md | 69 + .../guides/linking-services-together.md | 223 + .../guides/making-requests.md | 44 + .../guides/scripts-and-scheduling.md | 157 + .../guides/testing-foxx-services.md | 259 + .../guides/using-node-modules.md | 52 + .../guides/using-webpack-with-foxx.md | 100 + .../guides/working-with-collections.md | 70 + .../guides/working-with-files.md | 111 + .../guides/working-with-routers.md | 126 + .../guides/writing-queries.md | 232 + .../foxx-microservices/reference/_index.md | 19 + .../reference/configuration.md | 78 + .../reference/related-modules/_index.md | 62 + .../related-modules/authentication.md | 109 + .../reference/related-modules/graphql.md | 160 + .../reference/related-modules/oauth-1-0a.md | 430 + .../reference/related-modules/oauth-2-0.md | 290 + .../reference/related-modules/queues.md | 462 + .../reference/routers/_index.md | 217 + .../reference/routers/endpoints.md | 615 + .../reference/routers/middleware.md | 86 + .../reference/routers/request.md | 408 + .../reference/routers/response.md | 465 + .../reference/service-context.md | 297 + .../reference/service-manifest.md | 307 + .../reference/sessions-middleware/_index.md | 86 + .../session-storages/_index.md | 100 + .../session-storages/collection-storage.md | 88 + .../session-storages/jwt-storage.md | 74 + .../session-transports/_index.md | 84 + .../session-transports/cookie-transport.md | 79 + .../session-transports/header-transport.md | 39 + .../arangodb/oem/develop/http-api/_index.md | 108 + .../oem/develop/http-api/administration.md | 1654 ++ .../oem/develop/http-api/analyzers.md | 337 + .../oem/develop/http-api/authentication.md | 451 + .../oem/develop/http-api/batch-requests.md | 385 + .../arangodb/oem/develop/http-api/cluster.md | 1563 ++ .../oem/develop/http-api/collections.md | 5949 +++++++ .../oem/develop/http-api/databases.md | 461 + .../oem/develop/http-api/documents.md | 3068 ++++ .../arangodb/oem/develop/http-api/foxx.md | 1075 ++ .../http-api/general-request-handling.md | 562 + .../oem/develop/http-api/graphs/_index.md | 8 + .../oem/develop/http-api/graphs/edges.md | 181 + .../develop/http-api/graphs/named-graphs.md | 7664 ++++++++ .../oem/develop/http-api/hot-backups.md | 695 + .../arangodb/oem/develop/http-api/import.md | 606 + .../oem/develop/http-api/indexes/_index.md | 353 + .../oem/develop/http-api/indexes/fulltext.md | 120 + .../develop/http-api/indexes/geo-spatial.md | 173 + .../oem/develop/http-api/indexes/inverted.md | 644 + .../http-api/indexes/multi-dimensional.md | 121 + .../develop/http-api/indexes/persistent.md | 242 + .../oem/develop/http-api/indexes/ttl.md | 118 + .../arangodb/oem/develop/http-api/jobs.md | 831 + .../oem/develop/http-api/monitoring/_index.md | 8 + .../oem/develop/http-api/monitoring/logs.md | 741 + .../develop/http-api/monitoring/metrics.md | 214 + .../develop/http-api/monitoring/statistics.md | 689 + .../arangodb/oem/develop/http-api/pregel.md | 1375 ++ .../oem/develop/http-api/queries/_index.md | 8 + .../develop/http-api/queries/aql-queries.md | 3482 ++++ .../queries/aql-query-results-cache.md | 396 + .../queries/user-defined-aql-functions.md | 481 + .../develop/http-api/replication/_index.md | 22 + .../replication/other-replication-commands.md | 55 + .../replication/replication-applier.md | 1014 ++ .../http-api/replication/replication-dump.md | 992 ++ .../replication/replication-logger.md | 478 + .../http-api/replication/write-ahead-log.md | 776 + .../arangodb/oem/develop/http-api/security.md | 161 + .../arangodb/oem/develop/http-api/tasks.md | 623 + .../develop/http-api/transactions/_index.md | 17 + .../transactions/javascript-transactions.md | 310 + .../transactions/stream-transactions.md | 557 + .../arangodb/oem/develop/http-api/users.md | 1106 ++ .../oem/develop/http-api/views/_index.md | 30 + .../http-api/views/arangosearch-views.md | 2741 +++ .../http-api/views/search-alias-views.md | 1332 ++ .../oem/develop/integrations/_index.md | 49 + .../arangodb-datasource-for-apache-spark.md | 416 + .../_index.md | 283 + .../configuration.md | 244 + .../integrations/spring-boot-arangodb.md | 1507 ++ .../spring-data-arangodb/_index.md | 341 + .../spring-data-arangodb/migration.md | 109 + .../reference-version-3/_index.md | 6 + .../reference-version-3/mapping/_index.md | 187 + .../reference-version-3/mapping/auditing.md | 148 + .../reference-version-3/mapping/converter.md | 55 + .../reference-version-3/mapping/document.md | 97 + .../reference-version-3/mapping/edge.md | 95 + .../reference-version-3/mapping/events.md | 42 + .../reference-version-3/mapping/indexes.md | 121 + .../reference-version-3/mapping/reference.md | 65 + .../reference-version-3/mapping/relations.md | 34 + .../repositories/_index.md | 47 + .../repositories/queries/_index.md | 125 + .../repositories/queries/derived-queries.md | 218 + .../repositories/queries/named-queries.md | 34 + .../repositories/queries/query-methods.md | 132 + .../reference-version-3/template.md | 15 + .../reference-version-4/_index.md | 6 + .../reference-version-4/mapping/_index.md | 280 + .../reference-version-4/mapping/auditing.md | 148 + .../mapping/computed-values.md | 58 + .../reference-version-4/mapping/converter.md | 53 + .../reference-version-4/mapping/document.md | 97 + .../reference-version-4/mapping/edge.md | 95 + .../reference-version-4/mapping/events.md | 42 + .../reference-version-4/mapping/indexes.md | 121 + .../reference-version-4/mapping/reference.md | 65 + .../reference-version-4/mapping/relations.md | 34 + .../repositories/_index.md | 47 + .../repositories/queries/_index.md | 125 + .../repositories/queries/derived-queries.md | 218 + .../repositories/queries/named-queries.md | 34 + .../repositories/queries/query-methods.md | 132 + .../reference-version-4/template.md | 15 + .../javascript-api/@arangodb/_index.md | 248 + .../@arangodb/collection-object.md | 1773 ++ .../javascript-api/@arangodb/cursor-object.md | 106 + .../javascript-api/@arangodb/db-object.md | 1284 ++ .../javascript-api/@arangodb/view-object.md | 187 + .../oem/develop/javascript-api/_index.md | 290 + .../oem/develop/javascript-api/actions.md | 161 + .../oem/develop/javascript-api/analyzers.md | 183 + .../oem/develop/javascript-api/aql-queries.md | 98 + .../oem/develop/javascript-api/console.md | 172 + .../oem/develop/javascript-api/crypto.md | 352 + .../arangodb/oem/develop/javascript-api/fs.md | 296 + .../oem/develop/javascript-api/request.md | 184 + .../oem/develop/javascript-api/tasks.md | 177 + .../oem/develop/operational-factors.md | 361 + .../oem/develop/satellitecollections.md | 139 + .../arangodb/oem/develop/smartjoins.md | 308 + .../oem/develop/transactions/_index.md | 69 + .../oem/develop/transactions/durability.md | 66 + .../transactions/javascript-transactions.md | 466 + .../oem/develop/transactions/limitations.md | 156 + .../transactions/locking-and-isolation.md | 237 + .../transactions/stream-transactions.md | 212 + site/content/arangodb/oem/features/_index.md | 124 + .../oem/features/community-edition.md | 280 + .../oem/features/enterprise-edition.md | 120 + .../oem/features/highlights-by-version.md | 446 + .../arangodb/oem/get-started/_index.md | 100 + .../how-to-interact-with-arangodb.md | 140 + .../get-started/on-premises-installation.md | 99 + .../get-started/set-up-a-cloud-instance.md | 150 + .../oem/get-started/start-using-aql/_index.md | 157 + .../oem/get-started/start-using-aql/crud.md | 371 + .../get-started/start-using-aql/dataset.md | 99 + .../oem/get-started/start-using-aql/filter.md | 137 + .../oem/get-started/start-using-aql/geo.md | 144 + .../oem/get-started/start-using-aql/graphs.md | 303 + .../oem/get-started/start-using-aql/joins.md | 323 + .../get-started/start-using-aql/sort-limit.md | 183 + site/content/arangodb/oem/graphs/_index.md | 429 + .../oem/graphs/enterprisegraphs/_index.md | 56 + .../enterprisegraphs/getting-started.md | 314 + .../oem/graphs/enterprisegraphs/management.md | 336 + .../arangodb/oem/graphs/example-graphs.md | 262 + .../oem/graphs/general-graphs/_index.md | 107 + .../oem/graphs/general-graphs/functions.md | 938 + .../oem/graphs/general-graphs/management.md | 833 + .../oem/graphs/satellitegraphs/_index.md | 83 + .../oem/graphs/satellitegraphs/details.md | 254 + .../oem/graphs/satellitegraphs/management.md | 320 + .../arangodb/oem/graphs/smartgraphs/_index.md | 121 + .../oem/graphs/smartgraphs/getting-started.md | 207 + .../oem/graphs/smartgraphs/management.md | 354 + .../testing-graphs-on-single-server.md | 44 + .../arangodb/oem/graphs/working-with-edges.md | 36 + .../arangodb/oem/indexes-and-search/_index.md | 6 + .../oem/indexes-and-search/analyzers.md | 1693 ++ .../indexes-and-search/arangosearch/_index.md | 1004 ++ .../arangosearch-views-reference.md | 492 + .../case-sensitivity-and-diacritics.md | 125 + .../arangosearch/exact-value-matching.md | 236 + .../arangosearch/example-datasets.md | 38 + .../arangosearch/faceted-search.md | 137 + .../arangosearch/full-text-token-search.md | 182 + .../arangosearch/fuzzy-search.md | 388 + .../arangosearch/geospatial-search.md | 632 + .../arangosearch/nested-search.md | 321 + .../arangosearch/performance.md | 673 + .../phrase-and-proximity-search.md | 165 + .../arangosearch/prefix-matching.md | 453 + .../arangosearch/range-queries.md | 303 + .../arangosearch/ranking.md | 602 + .../search-alias-views-reference.md | 108 + .../arangosearch/search-highlighting.md | 215 + .../arangosearch/wildcard-search.md | 203 + .../oem/indexes-and-search/indexing/_index.md | 8 + .../oem/indexes-and-search/indexing/basics.md | 657 + .../indexing/index-utilization.md | 149 + .../indexing/which-index-to-use-when.md | 204 + .../indexing/working-with-indexes/_index.md | 376 + .../working-with-indexes/fulltext-indexes.md | 106 + .../geo-spatial-indexes.md | 434 + .../working-with-indexes/inverted-indexes.md | 630 + .../multi-dimensional-indexes.md | 172 + .../persistent-indexes.md | 309 + .../working-with-indexes/ttl-indexes.md | 202 + .../vertex-centric-indexes.md | 93 + .../content/arangodb/oem/operations/_index.md | 6 + .../oem/operations/administration/_index.md | 49 + .../administration/arangodb-starter/_index.md | 8 + .../arangodb-starter/recovery-procedure.md | 44 + .../arangodb-starter/removal-procedure.md | 69 + .../administration/configuration.md | 343 + .../administration/import-and-export.md | 11 + .../administration/license-management.md | 94 + .../operations/administration/log-levels.md | 154 + .../administration/reduce-memory-footprint.md | 725 + .../operations/administration/telemetrics.md | 129 + .../administration/user-management/_index.md | 433 + .../user-management/in-arangosh.md | 364 + .../oem/operations/backup-and-restore.md | 345 + .../oem/operations/installation/_index.md | 82 + .../installation/compiling/_index.md | 26 + .../compiling/compile-on-debian.md | 273 + .../compiling/compile-on-windows.md | 190 + .../compiling/recompiling-jemalloc.md | 54 + .../compiling/running-custom-build.md | 54 + .../oem/operations/installation/docker.md | 263 + .../operations/installation/linux/_index.md | 90 + .../linux/linux-os-tuning-script-examples.md | 132 + .../linux/operating-system-configuration.md | 192 + .../oem/operations/installation/macos.md | 43 + .../operations/installation/uninstallation.md | 35 + .../oem/operations/installation/windows.md | 254 + .../oem/operations/security/_index.md | 7 + .../oem/operations/security/audit-logging.md | 291 + .../security/change-root-password.md | 37 + .../operations/security/encryption-at-rest.md | 145 + .../security/securing-starter-deployments.md | 39 + .../operations/security/security-options.md | 372 + .../oem/operations/troubleshooting/_index.md | 7 + .../oem/operations/troubleshooting/arangod.md | 136 + .../troubleshooting/cluster/_index.md | 29 + .../troubleshooting/cluster/agency-dump.md | 40 + .../troubleshooting/emergency-console.md | 47 + .../troubleshooting/query-debug-packages.md | 75 + .../oem/operations/upgrading/_index.md | 88 + .../community-to-enterprise-upgrade.md | 61 + .../oem/operations/upgrading/downgrading.md | 85 + .../upgrading/manual-deployments/_index.md | 8 + .../manual-deployments/active-failover.md | 213 + .../upgrading/manual-deployments/cluster.md | 238 + .../os-specific-information/_index.md | 6 + .../os-specific-information/linux.md | 72 + .../os-specific-information/macos.md | 33 + .../os-specific-information/windows.md | 122 + .../upgrading/starter-deployments.md | 295 + .../arangodb/oem/release-notes/_index.md | 101 + .../deprecated-and-removed-features.md | 305 + .../oem/release-notes/version-3.0/_index.md | 6 + .../incompatible-changes-in-3-0.md | 1093 ++ .../version-3.0/whats-new-in-3-0.md | 536 + .../oem/release-notes/version-3.1/_index.md | 6 + .../incompatible-changes-in-3-1.md | 137 + .../version-3.1/whats-new-in-3-1.md | 222 + .../oem/release-notes/version-3.10/_index.md | 6 + .../version-3.10/api-changes-in-3-10.md | 1006 ++ .../incompatible-changes-in-3-10.md | 395 + .../version-3.10/known-issues-in-3-10.md | 63 + .../version-3.10/whats-new-in-3-10.md | 1822 ++ .../oem/release-notes/version-3.11/_index.md | 6 + .../version-3.11/api-changes-in-3-11.md | 881 + .../incompatible-changes-in-3-11.md | 695 + .../version-3.11/known-issues-in-3-11.md | 61 + .../version-3.11/whats-new-in-3-11.md | 1458 ++ .../oem/release-notes/version-3.2/_index.md | 6 + .../incompatible-changes-in-3-2.md | 139 + .../version-3.2/known-issues-in-3-2.md | 105 + .../version-3.2/whats-new-in-3-2.md | 388 + .../oem/release-notes/version-3.3/_index.md | 6 + .../incompatible-changes-in-3-3.md | 62 + .../version-3.3/known-issues-in-3-3.md | 18 + .../version-3.3/whats-new-in-3-3.md | 302 + .../oem/release-notes/version-3.4/_index.md | 6 + .../incompatible-changes-in-3-4.md | 842 + .../version-3.4/known-issues-in-3-4.md | 58 + .../version-3.4/whats-new-in-3-4.md | 1146 ++ .../oem/release-notes/version-3.5/_index.md | 6 + .../incompatible-changes-in-3-5.md | 163 + .../version-3.5/known-issues-in-3-5.md | 98 + .../version-3.5/whats-new-in-3-5.md | 734 + .../oem/release-notes/version-3.6/_index.md | 6 + .../incompatible-changes-in-3-6.md | 60 + .../version-3.6/known-issues-in-3-6.md | 95 + .../version-3.6/whats-new-in-3-6.md | 872 + .../oem/release-notes/version-3.7/_index.md | 6 + .../version-3.7/api-changes-in-3-7.md | 240 + .../incompatible-changes-in-3-7.md | 241 + .../version-3.7/known-issues-in-3-7.md | 81 + .../version-3.7/whats-new-in-3-7.md | 1004 ++ .../oem/release-notes/version-3.8/_index.md | 6 + .../version-3.8/api-changes-in-3-8.md | 374 + .../incompatible-changes-in-3-8.md | 568 + .../version-3.8/known-issues-in-3-8.md | 68 + .../version-3.8/whats-new-in-3-8.md | 1147 ++ .../oem/release-notes/version-3.9/_index.md | 6 + .../version-3.9/api-changes-in-3-9.md | 504 + .../incompatible-changes-in-3-9.md | 304 + .../version-3.9/known-issues-in-3-9.md | 69 + .../version-3.9/whats-new-in-3-9.md | 1266 ++ .../oem/release-notes/version-oem/_index.md | 6 + .../version-oem/api-changes-in-oem.md | 881 + .../incompatible-changes-in-oem.md | 695 + .../version-oem/known-issues-in-oem.md | 61 + .../version-oem/whats-new-in-oem.md | 1458 ++ site/content/arangodb/oem/use-cases.md | 162 + site/data/oem/allMetrics.yaml | 7073 ++++++++ site/data/oem/arangobackup.json | 1349 ++ site/data/oem/arangobench.json | 1738 ++ site/data/oem/arangod.json | 14457 ++++++++++++++++ site/data/oem/arangodump.json | 1779 ++ site/data/oem/arangoexport.json | 1510 ++ site/data/oem/arangoimport.json | 1763 ++ site/data/oem/arangoinspect.json | 2033 +++ site/data/oem/arangorestore.json | 1750 ++ site/data/oem/arangosh.json | 2057 +++ site/data/oem/arangovpack.json | 1002 ++ site/data/oem/cache.json | 4158 +++++ site/data/oem/errors.yaml | 1465 ++ site/data/oem/exitcodes.yaml | 87 + site/data/oem/optimizer-rules.json | 794 + 550 files changed, 210263 insertions(+), 6 deletions(-) create mode 100644 site/content/arangodb/3.12/release-notes/version-oem/_index.md create mode 100644 site/content/arangodb/3.12/release-notes/version-oem/api-changes-in-oem.md create mode 100644 site/content/arangodb/3.12/release-notes/version-oem/incompatible-changes-in-oem.md create mode 100644 site/content/arangodb/3.12/release-notes/version-oem/known-issues-in-oem.md create mode 100644 site/content/arangodb/3.12/release-notes/version-oem/whats-new-in-oem.md create mode 100644 site/content/arangodb/4.0/release-notes/version-oem/_index.md create mode 100644 site/content/arangodb/4.0/release-notes/version-oem/api-changes-in-oem.md create mode 100644 site/content/arangodb/4.0/release-notes/version-oem/incompatible-changes-in-oem.md create mode 100644 site/content/arangodb/4.0/release-notes/version-oem/known-issues-in-oem.md create mode 100644 site/content/arangodb/4.0/release-notes/version-oem/whats-new-in-oem.md create mode 100644 site/content/arangodb/oem/_index.md create mode 100644 site/content/arangodb/oem/aql/_index.md create mode 100644 site/content/arangodb/oem/aql/common-errors.md create mode 100644 site/content/arangodb/oem/aql/data-queries.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/_index.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/actors-and-movies-dataset-queries.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/counting.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/create-test-data.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/diffing-two-documents.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/dynamic-attribute-names.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/grouping.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/joins.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/projections-and-filters.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/queries-without-collections.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/remove-vertex.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/traversals.md create mode 100644 site/content/arangodb/oem/aql/examples-and-query-patterns/upsert-repsert-guide.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/_index.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/caching-query-results.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/explaining-queries.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/parsing-queries.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/query-optimization.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/query-profiling.md create mode 100644 site/content/arangodb/oem/aql/execution-and-performance/query-statistics.md create mode 100644 site/content/arangodb/oem/aql/functions/_index.md create mode 100644 site/content/arangodb/oem/aql/functions/arangosearch.md create mode 100644 site/content/arangodb/oem/aql/functions/array.md create mode 100644 site/content/arangodb/oem/aql/functions/bit.md create mode 100644 site/content/arangodb/oem/aql/functions/date.md create mode 100644 site/content/arangodb/oem/aql/functions/document-object.md create mode 100644 site/content/arangodb/oem/aql/functions/fulltext.md create mode 100644 site/content/arangodb/oem/aql/functions/geo.md create mode 100644 site/content/arangodb/oem/aql/functions/miscellaneous.md create mode 100644 site/content/arangodb/oem/aql/functions/numeric.md create mode 100644 site/content/arangodb/oem/aql/functions/string.md create mode 100644 site/content/arangodb/oem/aql/functions/type-check-and-cast.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/_index.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/accessing-data-from-collections.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/bind-parameters.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/data-types.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/limitations.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/query-errors.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/query-results.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/subqueries.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/syntax.md create mode 100644 site/content/arangodb/oem/aql/fundamentals/type-and-value-order.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/_index.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/all-shortest-paths.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/k-paths.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/k-shortest-paths.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/shortest-path.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/traversals-explained.md create mode 100644 site/content/arangodb/oem/aql/graph-queries/traversals.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/_index.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/collect.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/filter.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/for.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/insert.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/let.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/limit.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/remove.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/replace.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/return.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/search.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/sort.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/update.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/upsert.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/window.md create mode 100644 site/content/arangodb/oem/aql/high-level-operations/with.md create mode 100644 site/content/arangodb/oem/aql/how-to-invoke-aql/_index.md create mode 100644 site/content/arangodb/oem/aql/how-to-invoke-aql/with-arangosh.md create mode 100644 site/content/arangodb/oem/aql/how-to-invoke-aql/with-the-web-interface.md create mode 100644 site/content/arangodb/oem/aql/operators.md create mode 100644 site/content/arangodb/oem/aql/user-defined-functions.md create mode 100644 site/content/arangodb/oem/components/_index.md create mode 100644 site/content/arangodb/oem/components/arangodb-server/_index.md create mode 100644 site/content/arangodb/oem/components/arangodb-server/environment-variables.md create mode 100644 site/content/arangodb/oem/components/arangodb-server/ldap.md create mode 100644 site/content/arangodb/oem/components/arangodb-server/options.md create mode 100644 site/content/arangodb/oem/components/arangodb-server/storage-engine.md create mode 100644 site/content/arangodb/oem/components/tools/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangobackup/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangobackup/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangobackup/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangobench/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangobench/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-shell/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-shell/details.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-shell/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-shell/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-starter/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-starter/architecture.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-starter/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangodb-starter/security.md create mode 100644 site/content/arangodb/oem/components/tools/arangodump/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangodump/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangodump/limitations.md create mode 100644 site/content/arangodb/oem/components/tools/arangodump/maskings.md create mode 100644 site/content/arangodb/oem/components/tools/arangodump/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangoexport/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangoexport/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangoexport/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangoimport/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangoimport/details.md create mode 100644 site/content/arangodb/oem/components/tools/arangoimport/examples-csv.md create mode 100644 site/content/arangodb/oem/components/tools/arangoimport/examples-json.md create mode 100644 site/content/arangodb/oem/components/tools/arangoimport/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangoinspect/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangoinspect/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangoinspect/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangorestore/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangorestore/examples.md create mode 100644 site/content/arangodb/oem/components/tools/arangorestore/options.md create mode 100644 site/content/arangodb/oem/components/tools/arangovpack/_index.md create mode 100644 site/content/arangodb/oem/components/tools/arangovpack/options.md create mode 100644 site/content/arangodb/oem/components/tools/foxx-cli/_index.md create mode 100644 site/content/arangodb/oem/components/tools/foxx-cli/details.md create mode 100644 site/content/arangodb/oem/components/web-interface/_index.md create mode 100644 site/content/arangodb/oem/components/web-interface/cluster.md create mode 100644 site/content/arangodb/oem/components/web-interface/collections.md create mode 100644 site/content/arangodb/oem/components/web-interface/dashboard.md create mode 100644 site/content/arangodb/oem/components/web-interface/document.md create mode 100644 site/content/arangodb/oem/components/web-interface/graphs.md create mode 100644 site/content/arangodb/oem/components/web-interface/logs.md create mode 100644 site/content/arangodb/oem/components/web-interface/queries.md create mode 100644 site/content/arangodb/oem/components/web-interface/services.md create mode 100644 site/content/arangodb/oem/components/web-interface/users.md create mode 100644 site/content/arangodb/oem/concepts/_index.md create mode 100644 site/content/arangodb/oem/concepts/data-models.md create mode 100644 site/content/arangodb/oem/concepts/data-retrieval.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/_index.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/collections.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/databases.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/documents/_index.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/documents/computed-values.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/documents/schema-validation.md create mode 100644 site/content/arangodb/oem/concepts/data-structure/views.md create mode 100644 site/content/arangodb/oem/data-science/_index.md create mode 100644 site/content/arangodb/oem/data-science/arangograph-notebooks.md create mode 100644 site/content/arangodb/oem/data-science/arangographml/_index.md create mode 100644 site/content/arangodb/oem/data-science/arangographml/deploy.md create mode 100644 site/content/arangodb/oem/data-science/arangographml/getting-started.md create mode 100644 site/content/arangodb/oem/data-science/llm-knowledge-graphs.md create mode 100644 site/content/arangodb/oem/data-science/pregel/_index.md create mode 100644 site/content/arangodb/oem/data-science/pregel/algorithms.md create mode 100644 site/content/arangodb/oem/deploy/_index.md create mode 100644 site/content/arangodb/oem/deploy/active-failover/_index.md create mode 100644 site/content/arangodb/oem/deploy/active-failover/administration.md create mode 100644 site/content/arangodb/oem/deploy/active-failover/manual-start.md create mode 100644 site/content/arangodb/oem/deploy/active-failover/using-the-arangodb-starter.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/_index.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/administration.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/deployment/_index.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/deployment/arangodb-cluster.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/deployment/arangosync-master.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/deployment/arangosync-workers.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/deployment/prometheus-and-grafana.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/monitoring.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/operations-and-maintenance.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/security.md create mode 100644 site/content/arangodb/oem/deploy/arangosync/troubleshooting.md create mode 100644 site/content/arangodb/oem/deploy/architecture/_index.md create mode 100644 site/content/arangodb/oem/deploy/architecture/data-sharding.md create mode 100644 site/content/arangodb/oem/deploy/architecture/replication.md create mode 100644 site/content/arangodb/oem/deploy/architecture/scalability.md create mode 100644 site/content/arangodb/oem/deploy/cluster/_index.md create mode 100644 site/content/arangodb/oem/deploy/cluster/administration.md create mode 100644 site/content/arangodb/oem/deploy/cluster/deployment/_index.md create mode 100644 site/content/arangodb/oem/deploy/cluster/deployment/manual-start.md create mode 100644 site/content/arangodb/oem/deploy/cluster/deployment/using-the-arangodb-starter.md create mode 100644 site/content/arangodb/oem/deploy/cluster/limitations.md create mode 100644 site/content/arangodb/oem/deploy/in-the-cloud.md create mode 100644 site/content/arangodb/oem/deploy/kubernetes.md create mode 100644 site/content/arangodb/oem/deploy/oneshard.md create mode 100644 site/content/arangodb/oem/deploy/production-checklist.md create mode 100644 site/content/arangodb/oem/deploy/single-instance-vs-cluster.md create mode 100644 site/content/arangodb/oem/deploy/single-instance/_index.md create mode 100644 site/content/arangodb/oem/deploy/single-instance/manual-start.md create mode 100644 site/content/arangodb/oem/deploy/single-instance/using-the-arangodb-starter.md create mode 100644 site/content/arangodb/oem/develop/_index.md create mode 100644 site/content/arangodb/oem/develop/drivers/_index.md create mode 100644 site/content/arangodb/oem/develop/drivers/go.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/_index.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-6/_index.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-6/driver-setup.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-6/serialization.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-7/_index.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-7/changes-in-version-7.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-7/driver-setup.md create mode 100644 site/content/arangodb/oem/develop/drivers/java/reference-version-7/serialization.md create mode 100644 site/content/arangodb/oem/develop/drivers/javascript.md create mode 100644 site/content/arangodb/oem/develop/drivers/python.md create mode 100644 site/content/arangodb/oem/develop/error-codes.md create mode 100644 site/content/arangodb/oem/develop/exit-codes.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/deployment.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/getting-started.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/access-from-the-browser.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/authentication-and-sessions.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/development-mode.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/foxx-in-a-cluster.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/linking-services-together.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/making-requests.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/scripts-and-scheduling.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/testing-foxx-services.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/using-node-modules.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/using-webpack-with-foxx.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/working-with-collections.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/working-with-files.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/working-with-routers.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/guides/writing-queries.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/configuration.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/authentication.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/graphql.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/oauth-1-0a.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/oauth-2-0.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/related-modules/queues.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/routers/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/routers/endpoints.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/routers/middleware.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/routers/request.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/routers/response.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/service-context.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/service-manifest.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-storages/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-storages/collection-storage.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-storages/jwt-storage.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-transports/_index.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-transports/cookie-transport.md create mode 100644 site/content/arangodb/oem/develop/foxx-microservices/reference/sessions-middleware/session-transports/header-transport.md create mode 100644 site/content/arangodb/oem/develop/http-api/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/administration.md create mode 100644 site/content/arangodb/oem/develop/http-api/analyzers.md create mode 100644 site/content/arangodb/oem/develop/http-api/authentication.md create mode 100644 site/content/arangodb/oem/develop/http-api/batch-requests.md create mode 100644 site/content/arangodb/oem/develop/http-api/cluster.md create mode 100644 site/content/arangodb/oem/develop/http-api/collections.md create mode 100644 site/content/arangodb/oem/develop/http-api/databases.md create mode 100644 site/content/arangodb/oem/develop/http-api/documents.md create mode 100644 site/content/arangodb/oem/develop/http-api/foxx.md create mode 100644 site/content/arangodb/oem/develop/http-api/general-request-handling.md create mode 100644 site/content/arangodb/oem/develop/http-api/graphs/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/graphs/edges.md create mode 100644 site/content/arangodb/oem/develop/http-api/graphs/named-graphs.md create mode 100644 site/content/arangodb/oem/develop/http-api/hot-backups.md create mode 100644 site/content/arangodb/oem/develop/http-api/import.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/fulltext.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/geo-spatial.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/inverted.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/multi-dimensional.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/persistent.md create mode 100644 site/content/arangodb/oem/develop/http-api/indexes/ttl.md create mode 100644 site/content/arangodb/oem/develop/http-api/jobs.md create mode 100644 site/content/arangodb/oem/develop/http-api/monitoring/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/monitoring/logs.md create mode 100644 site/content/arangodb/oem/develop/http-api/monitoring/metrics.md create mode 100644 site/content/arangodb/oem/develop/http-api/monitoring/statistics.md create mode 100644 site/content/arangodb/oem/develop/http-api/pregel.md create mode 100644 site/content/arangodb/oem/develop/http-api/queries/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/queries/aql-queries.md create mode 100644 site/content/arangodb/oem/develop/http-api/queries/aql-query-results-cache.md create mode 100644 site/content/arangodb/oem/develop/http-api/queries/user-defined-aql-functions.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/other-replication-commands.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/replication-applier.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/replication-dump.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/replication-logger.md create mode 100644 site/content/arangodb/oem/develop/http-api/replication/write-ahead-log.md create mode 100644 site/content/arangodb/oem/develop/http-api/security.md create mode 100644 site/content/arangodb/oem/develop/http-api/tasks.md create mode 100644 site/content/arangodb/oem/develop/http-api/transactions/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/transactions/javascript-transactions.md create mode 100644 site/content/arangodb/oem/develop/http-api/transactions/stream-transactions.md create mode 100644 site/content/arangodb/oem/develop/http-api/users.md create mode 100644 site/content/arangodb/oem/develop/http-api/views/_index.md create mode 100644 site/content/arangodb/oem/develop/http-api/views/arangosearch-views.md create mode 100644 site/content/arangodb/oem/develop/http-api/views/search-alias-views.md create mode 100644 site/content/arangodb/oem/develop/integrations/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/arangodb-datasource-for-apache-spark.md create mode 100644 site/content/arangodb/oem/develop/integrations/kafka-connect-arangodb-sink-connector/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/kafka-connect-arangodb-sink-connector/configuration.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-boot-arangodb.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/migration.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/auditing.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/converter.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/document.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/edge.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/events.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/indexes.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/reference.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/mapping/relations.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/repositories/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/repositories/queries/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/repositories/queries/derived-queries.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/repositories/queries/named-queries.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/repositories/queries/query-methods.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-3/template.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/auditing.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/computed-values.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/converter.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/document.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/edge.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/events.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/indexes.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/reference.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/mapping/relations.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/repositories/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/repositories/queries/_index.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/repositories/queries/derived-queries.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/repositories/queries/named-queries.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/repositories/queries/query-methods.md create mode 100644 site/content/arangodb/oem/develop/integrations/spring-data-arangodb/reference-version-4/template.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/@arangodb/_index.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/@arangodb/collection-object.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/@arangodb/cursor-object.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/@arangodb/db-object.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/@arangodb/view-object.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/_index.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/actions.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/analyzers.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/aql-queries.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/console.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/crypto.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/fs.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/request.md create mode 100644 site/content/arangodb/oem/develop/javascript-api/tasks.md create mode 100644 site/content/arangodb/oem/develop/operational-factors.md create mode 100644 site/content/arangodb/oem/develop/satellitecollections.md create mode 100644 site/content/arangodb/oem/develop/smartjoins.md create mode 100644 site/content/arangodb/oem/develop/transactions/_index.md create mode 100644 site/content/arangodb/oem/develop/transactions/durability.md create mode 100644 site/content/arangodb/oem/develop/transactions/javascript-transactions.md create mode 100644 site/content/arangodb/oem/develop/transactions/limitations.md create mode 100644 site/content/arangodb/oem/develop/transactions/locking-and-isolation.md create mode 100644 site/content/arangodb/oem/develop/transactions/stream-transactions.md create mode 100644 site/content/arangodb/oem/features/_index.md create mode 100644 site/content/arangodb/oem/features/community-edition.md create mode 100644 site/content/arangodb/oem/features/enterprise-edition.md create mode 100644 site/content/arangodb/oem/features/highlights-by-version.md create mode 100644 site/content/arangodb/oem/get-started/_index.md create mode 100644 site/content/arangodb/oem/get-started/how-to-interact-with-arangodb.md create mode 100644 site/content/arangodb/oem/get-started/on-premises-installation.md create mode 100644 site/content/arangodb/oem/get-started/set-up-a-cloud-instance.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/_index.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/crud.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/dataset.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/filter.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/geo.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/graphs.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/joins.md create mode 100644 site/content/arangodb/oem/get-started/start-using-aql/sort-limit.md create mode 100644 site/content/arangodb/oem/graphs/_index.md create mode 100644 site/content/arangodb/oem/graphs/enterprisegraphs/_index.md create mode 100644 site/content/arangodb/oem/graphs/enterprisegraphs/getting-started.md create mode 100644 site/content/arangodb/oem/graphs/enterprisegraphs/management.md create mode 100644 site/content/arangodb/oem/graphs/example-graphs.md create mode 100644 site/content/arangodb/oem/graphs/general-graphs/_index.md create mode 100644 site/content/arangodb/oem/graphs/general-graphs/functions.md create mode 100644 site/content/arangodb/oem/graphs/general-graphs/management.md create mode 100644 site/content/arangodb/oem/graphs/satellitegraphs/_index.md create mode 100644 site/content/arangodb/oem/graphs/satellitegraphs/details.md create mode 100644 site/content/arangodb/oem/graphs/satellitegraphs/management.md create mode 100644 site/content/arangodb/oem/graphs/smartgraphs/_index.md create mode 100644 site/content/arangodb/oem/graphs/smartgraphs/getting-started.md create mode 100644 site/content/arangodb/oem/graphs/smartgraphs/management.md create mode 100644 site/content/arangodb/oem/graphs/smartgraphs/testing-graphs-on-single-server.md create mode 100644 site/content/arangodb/oem/graphs/working-with-edges.md create mode 100644 site/content/arangodb/oem/indexes-and-search/_index.md create mode 100644 site/content/arangodb/oem/indexes-and-search/analyzers.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/_index.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/arangosearch-views-reference.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/case-sensitivity-and-diacritics.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/exact-value-matching.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/example-datasets.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/faceted-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/full-text-token-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/fuzzy-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/geospatial-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/nested-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/performance.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/phrase-and-proximity-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/prefix-matching.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/range-queries.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/ranking.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/search-alias-views-reference.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/search-highlighting.md create mode 100644 site/content/arangodb/oem/indexes-and-search/arangosearch/wildcard-search.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/_index.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/basics.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/index-utilization.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/which-index-to-use-when.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/_index.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/fulltext-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/inverted-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/multi-dimensional-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/persistent-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/ttl-indexes.md create mode 100644 site/content/arangodb/oem/indexes-and-search/indexing/working-with-indexes/vertex-centric-indexes.md create mode 100644 site/content/arangodb/oem/operations/_index.md create mode 100644 site/content/arangodb/oem/operations/administration/_index.md create mode 100644 site/content/arangodb/oem/operations/administration/arangodb-starter/_index.md create mode 100644 site/content/arangodb/oem/operations/administration/arangodb-starter/recovery-procedure.md create mode 100644 site/content/arangodb/oem/operations/administration/arangodb-starter/removal-procedure.md create mode 100644 site/content/arangodb/oem/operations/administration/configuration.md create mode 100644 site/content/arangodb/oem/operations/administration/import-and-export.md create mode 100644 site/content/arangodb/oem/operations/administration/license-management.md create mode 100644 site/content/arangodb/oem/operations/administration/log-levels.md create mode 100644 site/content/arangodb/oem/operations/administration/reduce-memory-footprint.md create mode 100644 site/content/arangodb/oem/operations/administration/telemetrics.md create mode 100644 site/content/arangodb/oem/operations/administration/user-management/_index.md create mode 100644 site/content/arangodb/oem/operations/administration/user-management/in-arangosh.md create mode 100644 site/content/arangodb/oem/operations/backup-and-restore.md create mode 100644 site/content/arangodb/oem/operations/installation/_index.md create mode 100644 site/content/arangodb/oem/operations/installation/compiling/_index.md create mode 100644 site/content/arangodb/oem/operations/installation/compiling/compile-on-debian.md create mode 100644 site/content/arangodb/oem/operations/installation/compiling/compile-on-windows.md create mode 100644 site/content/arangodb/oem/operations/installation/compiling/recompiling-jemalloc.md create mode 100644 site/content/arangodb/oem/operations/installation/compiling/running-custom-build.md create mode 100644 site/content/arangodb/oem/operations/installation/docker.md create mode 100644 site/content/arangodb/oem/operations/installation/linux/_index.md create mode 100644 site/content/arangodb/oem/operations/installation/linux/linux-os-tuning-script-examples.md create mode 100644 site/content/arangodb/oem/operations/installation/linux/operating-system-configuration.md create mode 100644 site/content/arangodb/oem/operations/installation/macos.md create mode 100644 site/content/arangodb/oem/operations/installation/uninstallation.md create mode 100644 site/content/arangodb/oem/operations/installation/windows.md create mode 100644 site/content/arangodb/oem/operations/security/_index.md create mode 100644 site/content/arangodb/oem/operations/security/audit-logging.md create mode 100644 site/content/arangodb/oem/operations/security/change-root-password.md create mode 100644 site/content/arangodb/oem/operations/security/encryption-at-rest.md create mode 100644 site/content/arangodb/oem/operations/security/securing-starter-deployments.md create mode 100644 site/content/arangodb/oem/operations/security/security-options.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/_index.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/arangod.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/cluster/_index.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/cluster/agency-dump.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/emergency-console.md create mode 100644 site/content/arangodb/oem/operations/troubleshooting/query-debug-packages.md create mode 100644 site/content/arangodb/oem/operations/upgrading/_index.md create mode 100644 site/content/arangodb/oem/operations/upgrading/community-to-enterprise-upgrade.md create mode 100644 site/content/arangodb/oem/operations/upgrading/downgrading.md create mode 100644 site/content/arangodb/oem/operations/upgrading/manual-deployments/_index.md create mode 100644 site/content/arangodb/oem/operations/upgrading/manual-deployments/active-failover.md create mode 100644 site/content/arangodb/oem/operations/upgrading/manual-deployments/cluster.md create mode 100644 site/content/arangodb/oem/operations/upgrading/os-specific-information/_index.md create mode 100644 site/content/arangodb/oem/operations/upgrading/os-specific-information/linux.md create mode 100644 site/content/arangodb/oem/operations/upgrading/os-specific-information/macos.md create mode 100644 site/content/arangodb/oem/operations/upgrading/os-specific-information/windows.md create mode 100644 site/content/arangodb/oem/operations/upgrading/starter-deployments.md create mode 100644 site/content/arangodb/oem/release-notes/_index.md create mode 100644 site/content/arangodb/oem/release-notes/deprecated-and-removed-features.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.0/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.0/incompatible-changes-in-3-0.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.0/whats-new-in-3-0.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.1/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.1/incompatible-changes-in-3-1.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.1/whats-new-in-3-1.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.10/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.10/api-changes-in-3-10.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.10/incompatible-changes-in-3-10.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.10/known-issues-in-3-10.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.10/whats-new-in-3-10.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.11/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.11/api-changes-in-3-11.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.11/incompatible-changes-in-3-11.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.11/known-issues-in-3-11.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.11/whats-new-in-3-11.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.2/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.2/incompatible-changes-in-3-2.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.2/known-issues-in-3-2.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.2/whats-new-in-3-2.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.3/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.3/incompatible-changes-in-3-3.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.3/known-issues-in-3-3.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.3/whats-new-in-3-3.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.4/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.4/incompatible-changes-in-3-4.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.4/known-issues-in-3-4.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.4/whats-new-in-3-4.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.5/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.5/incompatible-changes-in-3-5.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.5/known-issues-in-3-5.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.5/whats-new-in-3-5.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.6/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.6/incompatible-changes-in-3-6.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.6/known-issues-in-3-6.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.6/whats-new-in-3-6.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.7/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.7/api-changes-in-3-7.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.7/incompatible-changes-in-3-7.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.7/known-issues-in-3-7.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.7/whats-new-in-3-7.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.8/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.8/api-changes-in-3-8.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.8/incompatible-changes-in-3-8.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.8/known-issues-in-3-8.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.8/whats-new-in-3-8.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.9/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.9/api-changes-in-3-9.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.9/incompatible-changes-in-3-9.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.9/known-issues-in-3-9.md create mode 100644 site/content/arangodb/oem/release-notes/version-3.9/whats-new-in-3-9.md create mode 100644 site/content/arangodb/oem/release-notes/version-oem/_index.md create mode 100644 site/content/arangodb/oem/release-notes/version-oem/api-changes-in-oem.md create mode 100644 site/content/arangodb/oem/release-notes/version-oem/incompatible-changes-in-oem.md create mode 100644 site/content/arangodb/oem/release-notes/version-oem/known-issues-in-oem.md create mode 100644 site/content/arangodb/oem/release-notes/version-oem/whats-new-in-oem.md create mode 100644 site/content/arangodb/oem/use-cases.md create mode 100644 site/data/oem/allMetrics.yaml create mode 100644 site/data/oem/arangobackup.json create mode 100644 site/data/oem/arangobench.json create mode 100644 site/data/oem/arangod.json create mode 100644 site/data/oem/arangodump.json create mode 100644 site/data/oem/arangoexport.json create mode 100644 site/data/oem/arangoimport.json create mode 100644 site/data/oem/arangoinspect.json create mode 100644 site/data/oem/arangorestore.json create mode 100644 site/data/oem/arangosh.json create mode 100644 site/data/oem/arangovpack.json create mode 100644 site/data/oem/cache.json create mode 100644 site/data/oem/errors.yaml create mode 100644 site/data/oem/exitcodes.yaml create mode 100644 site/data/oem/optimizer-rules.json diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md index 0b9a7638fa..10cac92e93 100644 --- a/PULL_REQUEST_TEMPLATE.md +++ b/PULL_REQUEST_TEMPLATE.md @@ -8,5 +8,6 @@ - 3.10: - 3.11: +- OEM: - 3.12: - 4.0: diff --git a/site/content/arangodb/3.11/aql/execution-and-performance/query-profiling.md b/site/content/arangodb/3.11/aql/execution-and-performance/query-profiling.md index 2f28da6760..68222c43f7 100644 --- a/site/content/arangodb/3.11/aql/execution-and-performance/query-profiling.md +++ b/site/content/arangodb/3.11/aql/execution-and-performance/query-profiling.md @@ -6,7 +6,7 @@ description: >- For understanding the performance of specific queries, you can profile them to identify slow parts of query execution plans --- -ArangoDB allows to execute your query with special instrumentation code enabled. +ArangoDB allows you to execute your query with special instrumentation code enabled. It provides you a query plan with detailed execution statistics. To use this in an interactive fashion on the shell you can use diff --git a/site/content/arangodb/3.12/_index.md b/site/content/arangodb/3.12/_index.md index d395c48a5a..24bd6e0af7 100644 --- a/site/content/arangodb/3.12/_index.md +++ b/site/content/arangodb/3.12/_index.md @@ -1,7 +1,7 @@ --- title: Recommended Resources menuTitle: '3.12' -weight: 97 +weight: 96 layout: default --- {{< cloudbanner >}} diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/_index.md b/site/content/arangodb/3.12/release-notes/version-3.12/_index.md index aa4ef4ad8f..0bff821346 100644 --- a/site/content/arangodb/3.12/release-notes/version-3.12/_index.md +++ b/site/content/arangodb/3.12/release-notes/version-3.12/_index.md @@ -1,6 +1,6 @@ --- title: Version 3.12 menuTitle: Version 3.12 -weight: 87 +weight: 86 description: '' --- diff --git a/site/content/arangodb/3.12/release-notes/version-oem/_index.md b/site/content/arangodb/3.12/release-notes/version-oem/_index.md new file mode 100644 index 0000000000..ab547220a9 --- /dev/null +++ b/site/content/arangodb/3.12/release-notes/version-oem/_index.md @@ -0,0 +1,6 @@ +--- +title: Version OEM +menuTitle: Version OEM +weight: 87 +description: '' +--- diff --git a/site/content/arangodb/3.12/release-notes/version-oem/api-changes-in-oem.md b/site/content/arangodb/3.12/release-notes/version-oem/api-changes-in-oem.md new file mode 100644 index 0000000000..93fbc165b9 --- /dev/null +++ b/site/content/arangodb/3.12/release-notes/version-oem/api-changes-in-oem.md @@ -0,0 +1,876 @@ +--- +title: API Changes in ArangoDB OEM LTS +menuTitle: API changes in OEM LTS +weight: 20 +description: >- + A summary of the changes to the HTTP API and other interfaces that are relevant + for developers, like maintainers of drivers and integrations for ArangoDB +--- +## HTTP RESTful API + +### Behavior changes + +#### Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +The feature is disabled by default to ensure compatibility with existing client +drivers and applications that only support ASCII names according to the +traditional naming constraints used in previous ArangoDB versions. + +If the feature is enabled, then any endpoints that contain database, collection, +View, or index names in the URL may contain special characters that were +previously not allowed (percent-encoded). They are also to be expected in +payloads that contain database, collection, View, or index names, as well as +document identifiers (because they are comprised of the collection name and the +document key). If client applications assemble URLs with extended names +programmatically, they need to ensure that extended names are properly +URL-encoded. + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming +constraints for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +#### Stricter validation of Unicode surrogate values in JSON data + +ArangoDB 3.11 employs a stricter validation of Unicode surrogate pairs in +incoming JSON data, for all REST APIs. + +In previous versions, the following loopholes existed when validating UTF-8 +surrogate pairs in incoming JSON data: + +- a high surrogate, followed by something other than a low surrogate + (or the end of the string) +- a low surrogate, not preceded by a high surrogate + +These validation loopholes have been closed in 3.11, which means that any JSON +inputs containing such invalid surrogate pair data are rejected by the server. + +This is normally the desired behavior, as it helps invalid data from entering +the database. However, in situations when a database is known to contain invalid +data and must continue supporting it (at least temporarily), the extended +validation can be disabled by setting the server startup option +`--server.validate-utf8-strings` to `false`. This is not recommended long-term, +but only during upgrading or data cleanup. + +#### Status code if write concern not fulfilled + +The new `--cluster.failed-write-concern-status-code` startup option can be used +to change the default `403` status code to `503` when the write concern cannot +be fulfilled for a write operation to a collection in a cluster deployment. +This signals client applications that it is a temporary error. Only the +HTTP status code changes in this case, no automatic retry of the operation is +attempted by the cluster. + +#### Graph API (Gharial) + +The `POST /_api/gharial/` endpoint for creating named graphs validates the +`satellites` property of the graph `options` for SmartGraphs differently now. + +If the `satellites` property is set, it must be an array, either empty or with +one or more collection name strings. If the value is not in that format, the +error "Missing array for field `satellites`" is now returned, for example, if +it is a string or a `null` value. Previously, it returned "invalid parameter type". +If the graph is not a SmartGraph, the `satellites` property is ignored unless its +value is an array but its elements are not strings, in which case the error +"Invalid parameter type" is returned. + +#### Validation of `smartGraphAttribute` in SmartGraphs + +Introduced in: v3.10.13, v3.11.7 + +The attribute defined by the `smartGraphAttribute` graph property is not allowed to be +changed in the documents of SmartGraph vertex collections. This is now strictly enforced. +You must set the attribute when creating a document. Any attempt to modify or remove +the attribute afterward by update or replace operations now throws an error. Previously, +the `smartGraphAttribute` value was checked only when inserting documents into a +SmartGraph vertex collection, but not for update or replace operations. + +The missing checks on update and replace operations allowed to retroactively +modify the value of the `smartGraphAttribute` for existing documents, which +could have led to problems when the data of such a SmartGraph vertex collection was +replicated to a new follower shard. On the new follower shard, the documents +went through the full validation and led to documents with modified +`smartGraphAttribute` values being rejected on the follower. This could have +led to follower shards not getting in sync. + +Now, the value of the `smartGraphAttribute` is fully validated with every +insert, update, or replace operation, and every attempt to modify the value of +the `smartGraphAttribute` retroactively fails with the `4003` error, +`ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_GRAPH_ATTRIBUTE`. +Additionally, if upon insertion the `smartGraphAttribute` is missing for a +SmartGraph vertex, the error code is error `4001`, `ERROR_NO_SMART_GRAPH_ATTRIBUTE`. + +To retroactively repair the data in any of the affected collections, it is +possible to update every (affected) document with the correct value of the +`smartGraphAttribute` via an AQL query as follows: + +``` +FOR doc IN @@collection + LET expected = SUBSTRING(doc._key, 0, FIND_FIRST(doc._key, ':')) + LET actual = doc.@attr + FILTER expected != actual + UPDATE doc WITH {@attr: expected} IN @@collection + COLLECT WITH COUNT INTO updated + RETURN updated +``` + +This updates all documents with the correct (expected) value of the +`smartGraphAttribute` if it deviates from the expected value. The query +returns the number of updated documents as well. + +The bind parameters necessary to run this query are: +- `@@collection`: name of a SmartGraph vertex collection to be updated +- `@attr`: attribute name of the `smartGraphAttribute` of the collection + +#### Database API + +The `POST /_api/database` endpoint for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +#### Document API + +The following endpoints support a new `refillIndexCaches` query +parameter to repopulate the index caches after requests that insert, update, +replace, or remove single or multiple documents (including edges) if this +affects an edge index or cache-enabled persistent indexes: + +- `POST /_api/document/{collection}` +- `PATCH /_api/document/{collection}/{key}` +- `PUT /_api/document/{collection}/{key}` +- `DELETE /_api/document/{collection}/{key}` + +It is a boolean option and the default is `false`. + +This also applies to the `INSERT`, `UPDATE`, `REPLACE`, and `REMOVE` operations +in AQL queries, which support a `refillIndexCache` option, too. + +In 3.9 and 3.10, `refillIndexCaches` was experimental and limited to edge caches. + +--- + +Introduced in: v3.11.1 + +When inserting multiple documents/edges at once in a cluster, the Document API +used to let the entire request fail if any of the documents/edges failed to be +saved due to a key error. More specifically, if the value of a `_key` attribute +contains illegal characters or if the key doesn't meet additional requirements, +for instance, coming from the collection being used in a Disjoint SmartGraph, +the `POST /_api/document/{collection}` endpoint would not reply with the usual +array of either the document metadata or the error object for each attempted +document insertion. Instead, it used to return an error object for the first +offending document only, and aborted the operation with an HTTP `400 Bad Request` +status code so that none of the documents were saved. Example: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +{"code":400,"error":true,"errorMessage":"illegal document key","errorNum":1221} + +> curl http://localhost:8529/_api/document/coll/valid +{"code":404,"error":true,"errorMessage":"document not found","errorNum":1202} +``` + +Now, such key errors in cluster deployments no longer fail the entire request, +matching the behavior of single server deployments. Any errors are reported in +the result array for the respective documents, along with the successful ones: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +[{"_id":"coll/valid","_key":"valid","_rev":"_gG9JHsW---"},{"error":true,"errorNum":1221,"errorMessage":"illegal document key"}] + +> curl http://localhost:8529/_api/document/coll/valid +{"_key":"valid","_id":"coll/valid","_rev":"_gG9JHsW---"} +``` + +--- + +Introduced in: v3.11.1 + +Using the Document API for reading multiple documents used to return an error +if the request body was an empty array. Example: + +```bash +> curl -XPUT -d '[]' 'http://localhost:8529/_api/document/coll?onlyget=true' +{"code":500,"error":true,"errorMessage":"internal error","errorNum":4} +``` + +Now, a request like this succeeds and returns an empty array as response. + +#### Collection API + +The edge collections of EnterpriseGraphs and SmartGraphs (including +Disjoint SmartGraphs and SmartGraphs using SatelliteCollections but excluding +the edge collections of the SatelliteCollections) previously reported a +value of `0` as the `numberOfShards`. They now return the actual number of +shards. This value can be higher than the configured `numberOfShards` value of +the graph due to internally used hidden collections. + +#### Cursor API + +When you link a collection to an `arangosearch` View and run an AQL query +against this View while it is still being indexed, you now receive the query result +including a warning. This warning alerts you about potentially incomplete results obtained +from a partially indexed collection. The error code associated with this +warning is `1240` (`ERROR_ARANGO_INCOMPLETE_READ`). + +--- + +Introduced in: v3.9.11, v3.10.7 + +In AQL graph traversals (`POST /_api/cursor` endpoint), you can restrict the +vertex and edge collections in the traversal options like so: + +```aql +FOR v, e, p IN 1..3 OUTBOUND 'products/123' components + OPTIONS { + vertexCollections: [ "bolts", "screws" ], + edgeCollections: [ "productsToBolts", "productsToScrews" ] + } + RETURN v +``` + +If you specify collections that don't exist, queries now fail with +a "collection or view not found" error (code `1203` and HTTP status +`404 Not Found`). In previous versions, unknown vertex collections were ignored, +and the behavior for unknown edge collections was undefined. + +Additionally, the collection types are now validated. If a document collection +or View is specified in `edgeCollections`, an error is raised +(code `1218` and HTTP status `400 Bad Request`). + +Furthermore, it is now an error if you specify a vertex collection that is not +part of the specified named graph (code `1926` and HTTP status `404 Not Found`). +It is also an error if you specify an edge collection that is not part of the +named graph's definition or of the list of edge collections (code `1939` and +HTTP status `400 Bad Request`). + +#### Log API + +Setting the log level for the `graphs` log topic to `TRACE` now logs detailed +information about AQL graph traversals and (shortest) path searches. +Some new log messages are also logged for the `DEBUG` level. + +#### Disabled Foxx APIs + +Introduced in: v3.10.5 + +A `--foxx.enable` startup option has been added to _arangod_. It defaults to `true`. +If the option is set to `false`, access to Foxx services is forbidden and is +responded with an HTTP `403 Forbidden` error. Access to the management APIs for +Foxx services are also disabled as if `--foxx.api false` is set manually. + +#### Configurable whitespace in metrics + +Introduced in: v3.10.6 + +The output format of the `/_admin/metrics` and `/_admin/metrics/v2` endpoints +slightly changes for metrics with labels. By default, the metric label and value +are separated by a space for improved compatibility with some tools. This is +controlled by the new `--server.ensure-whitespace-metrics-format` startup option, +which is enabled by default from v3.10.6 onward. Example: + +Enabled: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"} 0 +``` + +Disabled: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"}0 +``` + +#### Limit to the number of databases in a deployment + +Introduced in: v3.10.10, v3.11.2 + +The new `--database.max-databases` startup option can cap the number of databases +and creating databases using the `POST /_api/database` endpoint can thus now fail +for this reason if your deployment is at or above the configured maximum. Example: + +```json +{ + "code": 400, + "error": true, + "errorMessage": "unable to create additional database because it would exceed the configured maximum number of databases (2)", + "errorNum": 32 +} +``` + +### Endpoint return value changes + +Introduced in: v3.8.8, v3.9.4, v3.10.1 + +Changed the encoding of revision IDs returned by the below listed REST APIs: + +- `GET /_api/collection//revision`: The revision ID was + previously returned as numeric value, and now it is returned as + a string value with either numeric encoding or HLC-encoding inside. +- `GET /_api/collection//checksum`: The revision ID in + the `revision` attribute was previously encoded as a numeric value + in single server, and as a string in cluster. This is now unified so + that the `revision` attribute always contains a string value with + either numeric encoding or HLC-encoding inside. + +### Endpoints added + +#### Maintenance mode for DB-Servers + +Introduced in: v3.10.1 + +For rolling upgrades or rolling restarts, DB-Servers can now be put into +maintenance mode, so that no attempts are made to re-distribute the data in a +cluster for such planned events. DB-Servers in maintenance mode are not +considered viable failover targets because they are likely restarted soon. + +To query the maintenance status of a DB-Server, use this new endpoint: + +`GET /_admin/cluster/maintenance/` + +An example reply of a DB-Server that is in maintenance mode: + +```json +{ + "error": false, + "code": 200, + "result": { + "Mode": "maintenance", + "Until": "2022-10-26T06:14:23Z" + } +} +``` + +If the DB-Server is not in maintenance mode, then the `result` attribute is +omitted: + +```json +{ + "error": false, + "code": 200, +} +``` + +To put a DB-Server into maintenance mode, use this new endpoint: + +`PUT /_admin/cluster/maintenance/` + +The payload of the request needs to be as follows, with the `timeout` in seconds: + +```json +{ + "mode": "maintenance", + "timeout": 360 +} +``` + +To turn the maintenance mode off, set `mode` to `"normal"` instead, and omit the +`timeout` attribute or set it to `0`. + +You can send another request when the DB-Server is already in maintenance mode +to extend the timeout. + +The maintenance mode ends automatically after the defined timeout. + +Also see the [HTTP interface for cluster maintenance](../../develop/http-api/cluster.md#get-the-maintenance-status-of-a-db-server). + +#### Shard usage metrics + +Introduced in: v3.10.13, v3.11.7 + +With `GET /_admin/usage-metrics` you can retrieve detailed shard usage metrics on +DB-Servers. + +These metrics can be enabled by setting the `--server.export-shard-usage-metrics` +startup option to `enabled-per-shard` to make DB-Servers collect per-shard +usage metrics, or to `enabled-per-shard-per-user` to make DB-Servers collect +usage metrics per shard and per user whenever a shard is accessed. + +For more information, see the [HTTP API description](../../develop/http-api/monitoring/metrics.md#get-usage-metrics) +and [Monitoring per collection/database/user](../version-oem/whats-new-in-oem.md#monitoring-per-collectiondatabaseuser). + +### Endpoints augmented + +#### Cursor API + +- The `POST /_api/cursor` and `POST /_api/cursor/{cursor-identifier}` endpoints + can now return an additional statistics value in the `stats` sub-attribute, + `intermediateCommits`. It is the total number of intermediate commits the + query has performed. This number can only be greater than zero for + data modification queries that perform modifications beyond the + `--rocksdb.intermediate-commit-count` or `--rocksdb.intermediate-commit-size` + thresholds. In clusters, the intermediate commits are tracked per DB-Server + that participates in the query and are summed up in the end. + +- The `/_api/cursor` endpoint accepts a new `allowRetry` attribute in the + `options` object. Set this option to `true` to make it possible to retry + fetching the latest batch from a cursor. The default is `false`. + + If retrieving a result batch fails because of a connection issue, you can ask + for that batch again using the new `POST /_api/cursor//` + endpoint. The first batch has an ID of `1` and the value is incremented by 1 + with every batch. Every result response except the last one also includes a + `nextBatchId` attribute, indicating the ID of the batch after the current. + You can remember and use this batch ID should retrieving the next batch fail. + + You can only request the latest batch again (or the next batch). + Earlier batches are not kept on the server-side. + Requesting a batch again does not advance the cursor. + + You can also call this endpoint with the next batch identifier, i.e. the value + returned in the `nextBatchId` attribute of a previous request. This advances the + cursor and returns the results of the next batch. This is only supported if there + are more results in the cursor (i.e. `hasMore` is `true` in the latest batch). + + From v3.11.1 onward, you may use the `POST /_api/cursor//` + endpoint even if the `allowRetry` attribute is `false` to fetch the next batch, + but you cannot request a batch again unless you set it to `true`. + The `nextBatchId` attribute is always present in result objects (except in the + last batch) from v3.11.1 onward. + + To allow refetching of the very last batch of the query, the server cannot + automatically delete the cursor. After the first attempt of fetching the last + batch, the server would normally delete the cursor to free up resources. As you + might need to reattempt the fetch, it needs to keep the final batch when the + `allowRetry` option is enabled. Once you successfully received the last batch, + you should call the `DELETE /_api/cursor/` endpoint so that the + server doesn't unnecessarily keep the batch until the cursor times out + (`ttl` query option). + +- When profiling a query (`profile` option `true`, `1`, or `2`), the `profile` + object returned under `extra` now includes a new `"instantiating executors"` + attribute with the time needed to create the query executors, and in cluster + mode, this also includes the time needed for physically distributing the query + snippets to the participating DB-Servers. Previously, the time spent for + instantiating executors and the physical distribution was contained in the + `optimizing plan` stage. + +- The endpoint supports a new `maxDNFConditionMembers` query option, which is a + threshold for the maximum number of `OR` sub-nodes in the internal + representation of an AQL `FILTER` condition and defaults to `786432`. + +#### Analyzer types + +The `/_api/analyzer` endpoint supports a new Analyzer type in the +Enterprise Edition: + +- [`geo_s2`](../../indexes-and-search/analyzers.md#geo_s2) (introduced in v3.10.5): + Like the existing `geojson` Analyzer, but with an additional `format` property + that can be set to `"latLngDouble"` (default), `"latLngInt"`, or `"s2Point"`. + +#### Query API + +The [`GET /_api/query/current`](../../develop/http-api/queries/aql-queries.md#list-the-running-aql-queries) +and [`GET /_api/query/slow`](../../develop/http-api/queries/aql-queries.md#list-the-slow-aql-queries) +endpoints include a new numeric `peakMemoryUsage` attribute. + +--- + +The `GET /_api/query/current` endpoint can return a new value +`"instantiating executors"` as `state` in the query list. + +#### Index API + +##### Progress indication on the index generation + +Introduced in: v3.10.13, v3.11.7 + +The `GET /_api/index` endpoint may now include a `progress` attribute for the +elements in the `indexes` array. For every index that is currently being created, +it indicates the progress of the index generation (in percent). + +To return indexes that are not yet fully built but are in the building phase, +add the `withHidden=true` query parameter to the call of the endpoint. +Note that this includes internal indexes in the response as well, such as +`arangosearch` indexes. + +``` +curl "http://localhost:8529/_api/index?collection=myCollection&withHidden=true" +``` + +##### Restriction of indexable fields + +It is now forbidden to create indexes that cover fields whose attribute names +start or end with `:` , for example, `fields: ["value:"]`. This notation is +reserved for internal use. + +Existing indexes are not affected but you cannot create new indexes with a +preceding or trailing colon using the `POST /_api/index` endpoint. + +##### Inverted indexes + +Introduced in: v3.10.2 + +[Inverted indexes](../../develop/http-api/indexes/inverted.md) support new +caching options in the Enterprise Edition. + +- A new `cache` option for inverted indexes as the default (boolean, default: + `false`) or for specific `fields` (boolean, default: the value of the + top-level `cache` option) to always cache field normalization values and + Geo Analyzer auxiliary data in memory. + +- A new `cache` option per object in the definition of the `storedValues` + elements to always cache stored values in memory (boolean, default: `false`). + +- A new `cache` option in the `primarySort` property to always cache the + primary sort columns in memory (boolean, default: `false`). + +- A new `primaryKeyCache` property for inverted indexes to always cache the + primary key column in memory (boolean, default: `false`). + +The `POST /_api/index` endpoint accepts these new options for `inverted` indexes +and the `GET /_api/index` and `GET /_api/index/` endpoints may return +these options. The attributes are omitted in responses unless you enable the +respective option. + +#### View API + +Views of the type `arangosearch` support new caching options in the +Enterprise Edition. + +Introduced in: v3.9.5, v3.10.2 + +- A `cache` option for individual View links or fields (boolean, default: `false`). +- A `cache` option in the definition of a `storedValues` View property + (boolean, immutable, default: `false`). + +Introduced in: v3.9.6, v3.10.2 + +- A `primarySortCache` View property (boolean, immutable, default: `false`). +- A `primaryKeyCache` View property (boolean, immutable, default: `false`). + +The `POST /_api/view` endpoint accepts these new options for `arangosearch` +Views, the `GET /_api/view//properties` endpoint may return these +options, and you can change the `cache` View link/field property with the +`PUT /_api/view//properties` and `PATCH /_api/view//properties` +endpoints. + +Introduced in: v3.10.3 + +You may use a shorthand notations on `arangosearch` View creation or the +`storedValues` option, like `["attr1", "attr2"]`, instead of using an array of +objects. + +See the [`arangosearch` Views Reference](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#link-properties) +for details. + +#### Pregel API + +Four new endpoints have been added to the Pregel HTTP interface for the new +persisted execution statistics for Pregel jobs: + +- `GET /_api/control_pregel/history/{id}` to retrieve the persisted execution + statistics of a specific Pregel job +- `GET /_api/control_pregel/history` to retrieve the persisted execution + statistics of all currently active and past Pregel jobs +- `DELETE /_api/control_pregel/history/{id}` to delete the persisted execution + statistics of a specific Pregel job +- `DELETE /_api/control_pregel/history` to delete the persisted execution + statistics of all Pregel jobs + +#### Cluster rebalance API + +The `POST /_admin/cluster/rebalance` and `PUT /_admin/cluster/rebalance` +endpoints support a new `excludeSystemCollections` option that lets you ignore +system collections in the shard rebalance plan. + +The `/_admin/cluster/rebalance` route (`GET`, `POST`, and `PUT` methods) returns +a new `totalShardsFromSystemCollections` property in the `shards` object of the +`result` with the number of leader shards from system collections. The adjacent +`totalShards` property may not include system collections depending on the +`excludeSystemCollections` option. + +#### Explain API + +Introduced in: v3.10.4 + +The `POST /_api/explain` endpoint for explaining AQL queries includes the +following two new statistics in the `stats` attribute of the response now: + +- `peakMemoryUsage` (number): The maximum memory usage of the query during + explain (in bytes) +- `executionTime` (number): The (wall-clock) time in seconds needed to explain + the query. + +#### Metrics API + +The following metric has been added in version 3.11: + +| Label | Description | +|:------|:------------| +| `arangodb_search_num_primary_docs` | Number of primary documents for current snapshot. | + +--- + +Introduced in: v3.10.7, v3.11.1 + +This new metric reports the number of RocksDB `.sst` files: + +| Label | Description | +|:------|:------------| +| `rocksdb_total_sst_files` | Total number of RocksDB sst files, aggregated over all levels. | + +--- + +Introduced in: v3.10.7 + +The metrics endpoints include the following new file descriptors metrics: + +- `arangodb_file_descriptors_current` +- `arangodb_file_descriptors_limit` + +--- + +Introduced in: v3.8.9, v3.9.6, v3.10.2 + +The metrics endpoints include the following new traffic accounting metrics: + +- `arangodb_client_user_connection_statistics_bytes_received` +- `arangodb_client_user_connection_statistics_bytes_sent` +- `arangodb_http1_connections_total` + +--- + +Introduced in: v3.9.6, v3.10.2 + +The metrics endpoints include the following new edge cache (re-)filling metrics: + +- `rocksdb_cache_auto_refill_loaded_total` +- `rocksdb_cache_auto_refill_dropped_total` +- `rocksdb_cache_full_index_refills_total` + +--- + +Introduced in: v3.9.10, v3.10.5 + +The following metrics for write-ahead log (WAL) file tracking have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_live_wal_files` | Number of live RocksDB WAL files. | +| `rocksdb_wal_released_tick_flush` | Lower bound sequence number from which WAL files need to be kept because of external flushing needs. | +| `rocksdb_wal_released_tick_replication` | Lower bound sequence number from which WAL files need to be kept because of replication. | +| `arangodb_flush_subscriptions` | Number of currently active flush subscriptions. | + +--- + +Introduced in: v3.10.5 + +The following metric for the number of replication clients for a server has +been added: + +| Label | Description | +|:------|:------------| +| `arangodb_replication_clients` | Number of currently connected/active replication clients. | + +--- + +Introduced in: v3.9.11, v3.10.6 + +The following metrics for diagnosing delays in cluster-internal network requests +have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_network_dequeue_duration` | Internal request duration for the dequeue in seconds. | +| `arangodb_network_response_duration` | Internal request duration from fully sent till response received in seconds. | +| `arangodb_network_send_duration` | Internal request send duration in seconds. | +| `arangodb_network_unfinished_sends_total` | Number of internal requests for which sending has not finished. | + +--- + +Introduced in: v3.10.7 + +The following metric stores the peak value of the `rocksdb_cache_allocated` metric: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_peak_allocated` | Global peak memory allocation of ArangoDB in-memory caches. | + +--- + +Introduced in: v3.11.2 + +The following metrics have been added about the LZ4 compression for values in +the in-memory edge cache: + +- `rocksdb_cache_edge_inserts_effective_entries_size_total` +- `rocksdb_cache_edge_inserts_uncompressed_entries_size_total` +- `rocksdb_cache_edge_compression_ratio` + +--- + +Introduced in: v3.10.11, v3.11.4 + +The following metrics have been added to improve the observability of in-memory +cache subsystem: + +- `rocksdb_cache_free_memory_tasks_total` +- `rocksdb_cache_free_memory_tasks_duration_total` +- `rocksdb_cache_migrate_tasks_total` +- `rocksdb_cache_migrate_tasks_duration_total` + +--- + +Introduced in: v3.11.4 + +The following metrics have been added to improve the observability of in-memory +edge cache: + +- `rocksdb_cache_edge_compressed_inserts_total` +- `rocksdb_cache_edge_empty_inserts_total` +- `rocksdb_cache_edge_inserts_total` + +--- + +Introduced in: v3.11.5 + +The following metrics have been added to monitor and detect temporary or +permanent connectivity issues as well as how many scheduler threads are in the +detached state: + +- `arangodb_network_connectivity_failures_coordinators` +- `arangodb_network_connectivity_failures_dbservers_total` +- `arangodb_scheduler_num_detached_threads` + +#### Log level API + +Introduced in: v3.10.2 + +The `GET /_admin/log/level` and `PUT /_admin/log/level` endpoints support a new +query parameter `serverId`, to forward log level get and set requests to a +specific server. This makes it easier to adjust the log levels in clusters +because DB-Servers require JWT authentication whereas Coordinators also support +authentication using usernames and passwords. + +#### Explain API + +Introduced in: v3.10.4 + +The `POST /_api/explain` endpoint for explaining AQL queries includes the +following two new statistics in the `stats` attribute of the response now: + +- `peakMemoryUsage` (number): The maximum memory usage of the query during + explain (in bytes) +- `executionTime` (number): The (wall-clock) time in seconds needed to explain + the query. + +#### Optimizer rule descriptions + +Introduced in: v3.10.9, v3.11.2 + +The `GET /_api/query/rules` endpoint now includes a `description` attribute for +every optimizer rule that briefly explains what it does. + +### Endpoints deprecated + +The `GET /_admin/database/target-version` endpoint is deprecated in favor of the +more general version API with the endpoint `GET /_api/version`. +The endpoint will be removed in ArangoDB v3.12. + +## JavaScript API + +### Database creation + +The `db._createDatabase()` method for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +### Index methods + +Calling `collection.dropIndex(...)` or `db._dropIndex(...)` now raises an error +if the specified index does not exist or cannot be dropped (for example, because +it is a primary index or edge index). The methods previously returned `false`. +In case of success, they still return `true`. + +You can wrap calls to these methods with a `try { ... }` block to catch errors, +for example, in _arangosh_ or in Foxx services. + +### AQL queries + +When you use e.g. the `db._query()` method to execute an AQL query against an +`arangosearch` View while it is still in the process of being built, +the query now includes a warning message that the results may not be +complete due to the ongoing indexing process of the View. + +The error code associated with this warning is `1240` +(`ERROR_ARANGO_INCOMPLETE_READ`). + +--- + +Introduced in: v3.9.11, v3.10.7 + +If you specify collections that don't exist in the options of AQL graph traversals +(`vertexCollections`, `edgeCollections`), queries now fail. In previous versions, +unknown vertex collections were ignored, and the behavior for unknown +edge collections was undefined. + +Additionally, queries fail if you specify a document collection or View +in `edgeCollections`. + +### Pregel module + +Two new methods have been added to the `@arangodb/pregel` module: + +- `history(...)` to get the persisted execution statistics of a specific or all + algorithm executions +- `removeHistory(...)` to delete the persisted execution statistics of a + specific or all algorithm executions + +```js +var pregel = require("@arangodb/pregel"); +const execution = pregel.start("sssp", "demograph", { source: "vertices/V" }); +const historyStatus = pregel.history(execution); +pregel.removeHistory(); +``` + +### `collection.iterate()` deprecated + +The `collection.iterate()` method is deprecated from v3.11.0 onwards and will be +removed in a future version. + +### `@arangodb/request` certificate validation + +Introduced in: v3.11.11 + +The `@arangodb/request` module now supports two additional options for making +HTTPS requests: + +- `verifyCertificates` (optional): if set to `true`, the server certificate of + the remote server is verified using the default certificate store of the system. + Default: `false`. +- `verifyDepth` (optional): limit the maximum length of the certificate chain + that counts as valid. Default: `10`. diff --git a/site/content/arangodb/3.12/release-notes/version-oem/incompatible-changes-in-oem.md b/site/content/arangodb/3.12/release-notes/version-oem/incompatible-changes-in-oem.md new file mode 100644 index 0000000000..109b41bc11 --- /dev/null +++ b/site/content/arangodb/3.12/release-notes/version-oem/incompatible-changes-in-oem.md @@ -0,0 +1,695 @@ +--- +title: Incompatible changes in ArangoDB OEM LTS +menuTitle: Incompatible changes in OEM LTS +weight: 15 +description: >- + Check the following list of potential breaking changes **before** upgrading to + this ArangoDB version and adjust any client applications if necessary +--- +## Resolving known issues with versions up to 3.11.11 + +Due to an issue with the versions up to 3.11.11, please read the +information below and follow the linked procedures to avoid a potential problem. +Not following these procedures can cause your deployment to become +read-only in rare cases. + +{{< warning >}} +If you are a paying customer with a self-hosted deployment, contact the +Arango support for direct assistance. +Arango Managed Platform (AMP) customers do not need to take any action. +{{< /warning >}} + +**Issues that has been discovered that requires action:** + +- [Issues with the comparison of large indexed numbers](#corrected-sorting-order-for-numbers-in-velocypack-indexes) + +**Who should check for a potential issue:** + +- Deployments created with a version prior to 3.11.11 + +**Deployments not impacted:** + +- Deployments created with 3.11.11 or later 3.11.x version + +**Overview of impact** + +There is a risk of the RocksDB storage engine entering a state where no write operations are +possible anymore, should it discover index entries that are in an unexpected order. + +This can occur at any time, even if a previous check reported no affected indexes, +as there is no protection against storing and indexing data that may cause issues. +To prevent RocksDB from becoming read-only at some point in the future, it is +essential to follow the linked procedures. + +{{< tip >}} +It is recommended to schedule a maintenance time window for taking the ArangoDB +deployment offline to perform the upgrade procedure in the safest possible manner. +{{< /tip >}} + +**Paths to resolution:** + +| Current version | Resolved version | Steps to take | +|-----------------|------------------|---------------| +| 3.11.10 (or older) | 3.11.11 (or newer 3.11.x) | Create a backup, upgrade normally (following the standard [Upgrade path](../../operations/upgrading/_index.md#upgrade-paths) all the way to the latest 3.11.x version), then check for [affected numbers in indexes](#corrected-sorting-order-for-numbers-in-velocypack-indexes) and fix them. | +| 3.11.11 (or newer 3.11.x) | 3.12.4 (or newer) | **Do not upgrade to version 3.12.0, 3.12.1, 3.12.2, or 3.12.3**. Create a backup, check for [affected numbers in indexes](#corrected-sorting-order-for-numbers-in-velocypack-indexes) and fix them (if you haven't done so already or created the deployment with 3.11.11 or a later 3.11.x version), then upgrade to the latest 3.11.x version first, and finally upgrade to version 3.12.4 or later. | + +## Incompatibilities due to switch to glibc + +From version 3.11.10 onward, ArangoDB uses the glibc C standard library +implementation instead of libmusl. Even though glibc is statically linked into +the ArangoDB server and client tool executables, it may load additional modules +at runtime that are installed on your system. Under rare circumstances, it is +possible that ArangoDB crashes when performing host name or address lookups. +This is only the case if all of the following conditions are true: + +- You either use ArangoDB version 3.11.10 (non-hotfix), or you use a 3.11 version + from 3.11.10-1 onward with the `--honor-nsswitch` startup option enabled. +- You use an ArangoDB package on bare metal (not a Docker container) +- Your operating system uses glibc (like Ubuntu, Debian, RedHat, Centos, or + most other Linux distributions, but not Alpine for instance) +- The glibc version of your system is different than the one used by ArangoDB, + in particular if the system glibc is older than version 2.35 +- The `libnss-*` dynamic libraries are installed +- The `/etc/nsswitch.conf` configuration file contains settings other than for + `files` and `dns` in the `hosts:` line, or the `passwd:` and `group:` lines + contain something other than `files` + +If you are affected, consider using Docker containers, `chroot`, or change +`nsswitch.conf`. + +## VelocyStream protocol deprecation + +ArangoDB's own bi-directional asynchronous binary protocol VelocyStream (VST) is +deprecated in v3.11 and removed in v3.12.0. + +While VelocyStream support is still available in v3.11, it is highly recommended +to already switch to the HTTP(S) protocol because of better performance and +reliability. ArangoDB supports both VelocyPack and JSON over HTTP(S). + +## Active Failover deployment mode deprecation + +Running a single server with asynchronous replication to one or more passive +single servers for automatic failover is deprecated and will no longer be +supported in the next minor version of ArangoDB, from v3.12 onward. + +## Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +The old `--database.extended-names-databases` startup option should no longer +be used, but if you do, it behaves the same as the new +`--database.extended-names` option. + +The feature is disabled by default to ensure compatibility with existing client +drivers and applications that only support ASCII names according to the +traditional naming constraints used in previous ArangoDB versions. + +If the feature is enabled, then any endpoints that contain database, collection, +View, or index names in the URL may contain special characters that were +previously not allowed (percent-encoded). They are also to be expected in +payloads that contain database, collection, View, or index names, as well as +document identifiers (because they are comprised of the collection name and the +document key). If client applications assemble URLs with extended names +programmatically, they need to ensure that extended names are properly +URL-encoded. + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming +constraints for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +## No AQL user-defined functions (UDF) in `PRUNE` + +AQL user-defined functions (UDFs) cannot be used inside traversal PRUNE conditions +nor inside FILTER conditions that can be moved into the traversal execution on DB-Servers. +This limitation also applies to single servers to keep the differences to cluster +deployments minimal. + +## Stricter validation of Unicode surrogate values in JSON data + +ArangoDB 3.11 employs a stricter validation of Unicode surrogate pairs in +incoming JSON data, for all REST APIs. + +In previous versions, the following loopholes existed when validating UTF-8 +surrogate pairs in incoming JSON data: + +- a high surrogate, followed by something other than a low surrogate + (or the end of the string) +- a low surrogate, not preceded by a high surrogate + +These validation loopholes have been closed in 3.11, which means that any JSON +inputs containing such invalid surrogate pair data are rejected by the server. + +This is normally the desired behavior, as it helps invalid data from entering +the database. However, in situations when a database is known to contain invalid +data and must continue supporting it (at least temporarily), the extended +validation can be disabled by setting the server startup option +`--server.validate-utf8-strings` to `false`. This is not recommended long-term, +but only during upgrading or data cleanup. + +## Restriction of indexable fields + +It is now forbidden to create indexes that cover fields whose attribute names +start or end with `:` , for example, `fields: ["value:"]`. This notation is +reserved for internal use. + +Existing indexes are not affected but you cannot create new indexes with a +preceding or trailing colon. + +## Write-write conflict improvements + +Writes to the same document in quick succession can result in write-write +conflicts, requiring you to retry the operations. In v3.11, single document +operations via the [HTTP Interface for Documents](../../develop/http-api/documents.md) try to +avoid conflicts by locking the key of the document before performing the +modification. This serializes the write operations on the same document. +The behavior of AQL queries, Stream Transactions, and multi-document operations +remains unchanged. + +It is still possible for write-write conflicts to occur, and in these cases the +reported error is now slightly different. + +The lock acquisition on the key of the document that is supposed to be +inserted/modified has a hard-coded timeout of 1 second. If the lock cannot be +acquired, the error message is as follows: + +``` +Timeout waiting to lock key - in index primary of type primary over '_key'; conflicting key: +``` + +The `` corresponds to the document key of the write attempt. In addition, +the error object contains `_key`, `_id`, and `_rev` attributes. The `_key` and +`_id` correspond to the document of the write attempt, and `_rev` corresponds +to the current revision of the document as stored in the database (if available, +otherwise empty). + +If the lock cannot be acquired on a unique index entry, the error message is as +follows: + +``` +Timeout waiting to lock key - in index of type persistent over ''; document key: ; indexed values: [] +``` + +The `` is the name of the index in which the write attempt tried to +lock the entry, `` is the list of fields included in that index, `` +corresponds to the document key of the write attempt, and `` +corresponds to the indexed values of the document. In addition, the error object +contains `_key`, `_id`, and `_rev` attributes. The `_key` and `_id` correspond +to the document of the write attempt, and `_rev` corresponds to the current +revision of the document as stored in the database (if available, otherwise empty). + +## Deprecated and removed Pregel features + +- The experimental _Custom Pregel_ feature, also known as + _programmable Pregel algorithms_ (PPA), has been removed. + +- The built-in _DMID_ Pregel algorithm has been deprecated and will be removed + in a future release. + +- The `async` option for Pregel jobs has been removed. Some algorithms supported + an asynchronous mode to run without synchronized global iterations. This is no + longer supported. + +- The `useMemoryMaps` option for Pregel jobs to use memory-mapped files as a + backing storage for large datasets has been removed. Memory paging/swapping + provided by the operating system is equally effective. + +## New query stage + +- When profiling a query (`profile` option `true`, `1`, or `2`), the `profile` + object returned under `extra` now includes a new `"instantiating executors"` + attribute with the time needed to create the query executors, and in cluster + mode, this also includes the time needed for physically distributing the query + snippets to the participating DB-Servers. Previously, the time spent for + instantiating executors and the physical distribution was contained in the + `optimizing plan` stage. + +- The `state` of a query can now additionally be `"instantiating executors"` in + the list of currently running queries. + +## Limit for the normalization of `FILTER` conditions + +Converting complex AQL `FILTER` conditions with a lot of logical branches +(`AND`, `OR`, `NOT`) into the internal DNF (disjunctive normal form) format can +take a large amount of processing time and memory. The new `maxDNFConditionMembers` +query option is a threshold for the maximum number of `OR` sub-nodes in the +internal representation and defaults to `786432`. + +You can also set the threshold globally instead of per query with the +[`--query.max-dnf-condition-members` startup option](../../components/arangodb-server/options.md#--querymax-dnf-condition-members). + +If the threshold is hit, the query continues with a simplified representation of +the condition, which is **not usable in index lookups**. However, this should +still be better than overusing memory or taking a very long time to compute the +DNF version. + +## Validation of `smartGraphAttribute` in SmartGraphs + +Introduced in: v3.10.13, v3.11.7 + +The attribute defined by the `smartGraphAttribute` graph property is not allowed to be +changed in the documents of SmartGraph vertex collections. This is now strictly enforced. +See [API Changes in ArangoDB 3.11](api-changes-in-oem.md#validation-of-smartgraphattribute-in-smartgraphs) +for details and instructions on how to repair affected attributes. + +## Validation of traversal collection restrictions + +Introduced in: v3.9.11, v3.10.7 + +In AQL graph traversals, you can restrict the vertex and edge collections in the +traversal options like so: + +```aql +FOR v, e, p IN 1..3 OUTBOUND 'products/123' components + OPTIONS { + vertexCollections: [ "bolts", "screws" ], + edgeCollections: [ "productsToBolts", "productsToScrews" ] + } + RETURN v +``` + +If you specify collections that don't exist, queries now fail with +a "collection or view not found" error (code `1203` and HTTP status +`404 Not Found`). In previous versions, unknown vertex collections were ignored, +and the behavior for unknown edge collections was undefined. + +Additionally, the collection types are now validated. If a document collection +or View is specified in `edgeCollections`, an error is raised +(code `1218` and HTTP status `400 Bad Request`). + +Furthermore, it is now an error if you specify a vertex collection that is not +part of the specified named graph (code `1926` and HTTP status `404 Not Found`). +It is also an error if you specify an edge collection that is not part of the +named graph's definition or of the list of edge collections (code `1939` and +HTTP status `400 Bad Request`). + +## Batch insertions of documents with key errors no longer fail the entire operation + +Introduced in: v3.11.1 + +When inserting multiple documents/edges at once in a cluster, the Document API +used to let the entire request fail if any of the documents/edges failed to be +saved due to a key error. More specifically, if the value of a `_key` attribute +contains illegal characters or if the key doesn't meet additional requirements, +for instance, coming from the collection being used in a Disjoint SmartGraph, +the `POST /_api/document/{collection}` endpoint would not reply with the usual +array of either the document metadata or the error object for each attempted +document insertion. Instead, it used to return an error object for the first +offending document only, and aborted the operation so that none of the documents +were saved. Example: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +{"code":400,"error":true,"errorMessage":"illegal document key","errorNum":1221} + +> curl http://localhost:8529/_api/document/coll/valid +{"code":404,"error":true,"errorMessage":"document not found","errorNum":1202} +``` + +Now, such key errors in cluster deployments no longer fail the entire request, +matching the behavior of single server deployments. Any errors are reported in +the result array for the respective documents, along with the successful ones: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +[{"_id":"coll/valid","_key":"valid","_rev":"_gG9JHsW---"},{"error":true,"errorNum":1221,"errorMessage":"illegal document key"}] + +> curl http://localhost:8529/_api/document/coll/valid +{"_key":"valid","_id":"coll/valid","_rev":"_gG9JHsW---"} +``` + +## Exit code adjustments + +Introduced in: v3.10.13, v3.11.7 + +For some fatal errors like a required database upgrade or a failed version check, +_arangod_ set the generic exit code of `1`. It now returns a different, more +specific exit code in these cases. + +## Batch-reading an empty list of documents succeeds + +Introduced in: v3.11.1 + +Using the Document API for reading multiple documents used to return an error +if the request body was an empty array. Example: + +```bash +> curl -XPUT -d '[]' 'http://localhost:8529/_api/document/coll?onlyget=true' +{"code":500,"error":true,"errorMessage":"internal error","errorNum":4} +``` + +Now, a request like this succeeds and returns an empty array as response. + +## Corrected sorting order for numbers in VelocyPack indexes + +Introduced in: v3.11.11, v3.12.2 + +- [Issues with the comparison of large indexed numbers](#issues-with-the-comparison-of-large-indexed-numbers) +- [Check if you are affected](#check-if-you-are-affected) +- [If the deployment is NOT affected](#if-the-deployment-is-not-affected) +- [If the deployment is affected](#if-the-deployment-is-affected) + +### Issues with the comparison of large indexed numbers + +If you store very large numeric values in ArangoDB – greater than/equal to +253 (9,007,199,254,740,992) or less than/equal to +-253 (-9,007,199,254,740,992) – and index them with an affected +index type, the values may not be in the correct order. This is due to how the +comparison is executed in versions before v3.11.11 and v3.12.2. If the numbers +are represented using different VelocyPack types internally, they are converted +to doubles and then compared. This conversion is lossy for integers with a very +large absolute value, resulting in an incorrect ordering of the values. + +The possibly affected index types are the following that allow storing +VelocyPack data in them: +- `persistent` (including vertex-centric indexes) +- `mdi-prefixed` (but not `mdi` indexes; only available from v3.12.0 onward) +- `hash` (legacy alias for persistent indexes) +- `skiplist` (legacy alias for persistent indexes) + +{{< warning >}} +The incorrect sort order in an index can lead to the RocksDB storage engine +discovering out-of-order keys and then refusing further write operations with +errors and warnings. +{{< /warning >}} + +To prevent ArangoDB deployments from entering a read-only mode due to this issue, +please follow the below procedures to check if your deployment is affected and +how to correct it if necessary. + +### Check if you are affected + +The following procedure is recommended for every deployment unless it has been +created with v3.11.11, v3.12.2, or any later version. + +1. Create a backup as a precaution. If you run the Enterprise Edition, you can + create a Hot Backup. Otherwise, create a full dump with _arangodump_ + (including all databases and system collections). + +2. If your deployment is on a 3.11.x version older than 3.11.11, upgrade to + the latest 3.11 version that is available. + + If your deployment is on version 3.12.0 or 3.12.1, upgrade to the latest + 3.12 version that is available but be sure to also read about the string + sorting issue in [Resolving known issues with versions prior to 3.12.4](../../../3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md#resolving-known-issues-with-versions-prior-to-3124) + and the linked upgrade procedures. + +3. Call the `GET /_admin/cluster/vpackSortMigration/check` endpoint to let + ArangoDB check all indexes. As it can take a while for large deployments, + it is recommended to run this operation as an asynchronous job + (`x-arango-async: store` header) so that you can check the result later. + + The endpoint is available for all deployment modes, not only in clusters. + In case of a cluster, send the request to one of the Coordinators. + Example with ArangoDB running locally on the default port: + + ```shell + curl --dump-header - -H "x-arango-async: store" http://localhost:8529/_admin/cluster/vpackSortMigration/check + ``` + +4. Inspect the response to find the job ID in the `X-Arango-Async-Id` HTTP header. + The job ID is `12345` in the following example: + + ``` + HTTP/1.1 202 Accepted + X-Arango-Queue-Time-Seconds: 0.000000 + Strict-Transport-Security: max-age=31536000 ; includeSubDomains + Expires: 0 + Pragma: no-cache + Cache-Control: no-cache, no-store, must-revalidate, pre-check=0, post-check=0, max-age=0, s-maxage=0 + Content-Security-Policy: frame-ancestors 'self'; form-action 'self'; + X-Content-Type-Options: nosniff + X-Arango-Async-Id: 12345 + Server: ArangoDB + Connection: Keep-Alive + Content-Type: text/plain; charset=utf-8 + Content-Length: 0 + ``` + +5. Call the `PUT /_api/job/12345` endpoint, substituting `12345` with your + actual job ID. It returns nothing if the job is still ongoing. You can repeat + this call every once in a while to check again. + + ```shell + curl -XPUT http://localhost:8529/_api/job/12345 + ``` + +6. If there are no issues with your deployment, the check result reports an + empty list of affected indexes and an according message. + + ```json + { + "error": false, + "code": 200, + "result": { + "affected": [], + "error": false, + "errorCode": 0, + "errorMessage": "all good with sorting order" + } + } + ``` + + If this is the case, continue with + [If the deployment is NOT affected](#if-the-deployment-is-not-affected). + + If affected indexes are found, the check result looks similar to this: + + ```json + { + "error": false, + "code": 200, + "result": { + "affected": [ + { + "database": "_system", + "collection": "coll", + "indexId": 195, + "indexName": "idx_1806192152446763008" + } + ], + "error": true, + "errorCode": 1242, + "errorMessage": "some indexes have legacy sorted keys" + } + } + ``` + + If this is the case, continue with + [If the deployment is affected](#if-the-deployment-is-affected). + +### If the deployment is NOT affected + +1. Make sure that no problematic values are written to or removed from an index + between checking for affected indexes and completing the procedure. + To be safe, you may want to stop all writes to the database system. + +2. You can perform a regular in-place upgrade and mark the deployment as correct + using a special HTTP API endpoint in the next step. + + That is, create a backup and upgrade your deployment to the + latest bugfix version with the same major and minor version (e.g. from 3.11.x + to at least 3.11.11 or from 3.12.x to at least 3.12.2). + +3. Call the `PUT /_admin/cluster/vpackSortMigration/migrate` endpoint to mark + the deployment as having the correct sorting order. This requires + [superuser permissions](../../develop/http-api/authentication.md#jwt-superuser-tokens) + unless authentication is disabled. + + ```shell + curl -H "Authorization: bearer " -XPUT http://localhost:8529/_admin/cluster/vpackSortMigration/migrate + ``` + + ```json + { + "error": false, + "code": 200, + "result": { + "error": false, + "errorCode": 0, + "errorMessage": "VPack sorting migration done." + } + } + ``` + +4. For the corrected sorting order to take effect, restart the ArangoDB server, + respectively restart the DB-Servers of the cluster. + +5. Complete the procedure by resuming writes to the database systems. + +### If the deployment is affected + +{{< info >}} +If you are a customer, please contact the Arango support to assist you with +the following steps. +{{< /info >}} + +1. This step depends on the deployment mode: + + - **Single server**: Create a new server. Then create a full dump with + [arangodump](../../components/tools/arangodump/_index.md) of the old server, + using the `--all-databases` and `--include-system-collections` startup options + and a user account with administrate access to the `_system` database and + at least read access to all other databases to ensure all data including + the `_users` system collection are dumped. + + Restore the dump to the new single server using at least v3.11.11 or v3.12.4 + (v3.12.2 only addresses this but not [another issue](../../../3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md#corrected-sorting-order-for-strings-in-velocypack-indexes)). + You need to use a new database directory. + + - **Active Failover**: You need to replace all servers of the deployment. + You can do so in a rolling manner. + + Create a new server and add it as a new follower to the deployment. + When it is in-sync with the leader, remove one of the old followers. + Replace any other old followers in the same manner. Then create + one more new server, add it as a follower, and wait until it is in-sync. + Then remove the old leader, failing over to one of the new followers. + You should stop all writes temporarily before and after the failover so + that nothing is lost, as the Active Failover replication is asynchronous. + + You can also follow the single server instructions if it's acceptable to + have downtime. + + - **Cluster**: Replace the DB-Server nodes until they all run at least + v3.11.11 or v3.12.4 (rolling upgrade). Syncing new nodes writes the data in + the correct order. This deployment mode and approach avoids downtimes. + + For each DB-Server, add a new DB-Server node to the cluster. Wait until all + new DB-Servers are in sync, then clean out the old DB-Server nodes. + +2. New instances using the fixed versions initialize the database directory + with the sorting order marked as correct and also restore data from dumps + correctly. There is no need to call the `.../vpackSortMigration/migrate` + HTTP API endpoint like in the unaffected case. + +3. If you revert to an older state with affected indexes by restoring a + Hot Backup, you need to repeat the procedure. + +## Changed JSON serialization and VelocyPack format for replication + +Introduced in: v3.11.12, v3.12.3 + +While there is only one number type in JSON, the VelocyPack format that ArangoDB +uses supports different numeric data types. When converting between VelocyPack +and JSON, it was previously possible for precision loss to occur in edge cases. +This also affected creating and restoring dumps with arangodump and arangorestore. + +A double (64-bit floating-point) value `1152921504606846976.0` (260) +used to be serialized to `1152921504606847000` in JSON, which deserializes back +to `1152921504606846976` when using a double. However, the serialized value got +parsed as an unsigned integer, resulting in an incorrect value of +`1152921504606847000`. + +Numbers with an absolute value greater or equal to 253 and less than +264 (which always represents an integer) are now serialized faithfully +to JSON using an integer conversion routine and then `.0` is appended (e.g. +`1152921504606846976.0`) to ensure that they get parsed back to the exact same +double value. All other values are serialized as before, e.g. small integral +values don't get `.0` appended, and they get parsed back to integers with the +same numerical value. + +Moreover, replication-related APIs such as the `/_api/wal/tail` endpoint now +support the VelocyPack format. The cluster replication has been changed to use +VelocyPack instead of JSON to avoid unnecessary conversions and avoiding any +risk of deviations due to the serialization. + +## JavaScript API + +### Database creation + +The `db._createDatabase()` method for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +### Index methods + +Calling `collection.dropIndex(...)` or `db._dropIndex(...)` now raises an error +if the specified index does not exist or cannot be dropped (for example, because +it is a primary index or edge index). The methods previously returned `false`. +In case of success, they still return `true`. + +You can wrap calls to these methods with a `try { ... }` block to catch errors, +for example, in _arangosh_ or in Foxx services. + +## Startup options + +### `--server.disable-authentication` and `--server.disable-authentication-unix-sockets` obsoleted + +The `--server.disable-authentication` and `--server.disable-authentication-unix-sockets` +startup options are now obsolete. Specifying them is still tolerated but has +no effect anymore. These options were deprecated in v3.0 and mapped to +`--server.authentication` and `--server.authentication-unix-sockets`, which +made them do the opposite of what their names suggest. + +### `--database.force-sync-properties` deprecated + +The `--database.force-sync-properties` option was useful with the MMFiles +storage engine, which has been removed in v3.7. The option does not have any +useful effect if you use the RocksDB storage engine. From v3.11.0 onwards, it +has no effect at all, is deprecated, and will be removed in a future version. + +### `--agency.pool-size` deprecated + +The `--agency.pool-size` option was effectively not properly supported in any +version of ArangoDB. Setting the option to anything but the value of +`--agency.size` should be avoided. + +From v3.11.0 onwards, this option is deprecated, and setting it to a value +different than the value of `--agency.size` leads to a startup error. + +### `--query.parallelize-gather-writes` obsoleted + +Parallel gather is now enabled by default and supported for most queries. +The `--query.parallelize-gather-writes` startup option has no effect anymore, +but specifying it still tolerated. + +See [Features and Improvements in ArangoDB 3.11](whats-new-in-oem.md#parallel-gather) +for details. + +### `--pregel.memory-mapped-files*` obsoleted + +Pregel no longer supports use memory-mapped files as a backing storage. +The following startup options have therefore been removed: + +- `--pregel.memory-mapped-files` +- `--pregel.memory-mapped-files-custom-path` +- `--pregel.memory-mapped-files-location-type` + +You can still specify them on startup without raising errors but they have no +effect anymore. + +## Client tools + +### arangoexport + +The default output file type produced by arangoexport, controlled by the `--type` +startup option, has been changed from `json` to `jsonl`. +This allows for more efficient processing of the files produced by arangoexport +with other tools, such as arangoimport, by default. diff --git a/site/content/arangodb/3.12/release-notes/version-oem/known-issues-in-oem.md b/site/content/arangodb/3.12/release-notes/version-oem/known-issues-in-oem.md new file mode 100644 index 0000000000..05ecdda870 --- /dev/null +++ b/site/content/arangodb/3.12/release-notes/version-oem/known-issues-in-oem.md @@ -0,0 +1,61 @@ +--- +title: Known Issues in ArangoDB OEM LTS +menuTitle: Known Issues in OEM LTS +weight: 10 +description: >- + Important issues affecting the OEM LTS versions of the ArangoDB suite of products +--- +Note that this page does not list all open issues. + +## ArangoSearch + +| Issue | +|------------| +| **Date Added:** 2018-12-19
**Component:** ArangoSearch
**Deployment Mode:** Single-server
**Description:** Value of `_id` attribute indexed by `arangosearch` View may become inconsistent after renaming a collection
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#514](https://github.com/arangodb/backlog/issues/514) (internal) | +| **Date Added:** 2018-12-03
**Component:** ArangoSearch
**Deployment Mode:** Cluster
**Description:** Score values evaluated by corresponding score functions (BM25/TFIDF) may differ in single-server and cluster with a collection having more than 1 shard
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#508](https://github.com/arangodb/backlog/issues/508) (internal) | +| **Date Added:** 2018-12-03
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** Using a loop variable in expressions within a corresponding SEARCH condition is not supported
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#318](https://github.com/arangodb/backlog/issues/318) (internal) | +| **Date Added:** 2019-06-25
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** The `primarySort` attribute in `arangosearch` View definitions cannot be set via the web interface. The option is immutable, but the web interface does not allow to set any View properties upfront (it creates a View with default parameters before the user has a chance to configure it).
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2020-03-19
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** Operators and functions in `SEARCH` clauses of AQL queries which compare values such as `>`, `>=`, `<`, `<=`, `IN_RANGE()` and `STARTS_WITH()` neither take the server language (`--default-language`) nor the Analyzer locale into account. The alphabetical order of characters as defined by a language is thus not honored and can lead to unexpected results in range queries.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#679](https://github.com/arangodb/backlog/issues/679) (internal) | + +## AQL + +| Issue | +|------------| +| **Date Added:** 2018-09-05
**Component:** AQL
**Deployment Mode:** Cluster
**Description:** In a very uncommon edge case there is an issue with an optimization rule in the cluster. If you are running a cluster and use a custom shard key on a collection (default is `_key`) **and** you provide a wrong shard key in a modifying query (`UPDATE`, `REPLACE`, `DELETE`) **and** the wrong shard key is on a different shard than the correct one, a `DOCUMENT NOT FOUND` error is returned instead of a modification (example query: `UPDATE { _key: "123", shardKey: "wrongKey"} WITH { foo: "bar" } IN mycollection`). Note that the modification always happens if the rule is switched off, so the suggested workaround is to [deactivate the optimizing rule](../../aql/execution-and-performance/query-optimization.md#turning-specific-optimizer-rules-off) `restrict-to-single-shard`.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/arangodb#6399](https://github.com/arangodb/arangodb/issues/6399) | + +## Upgrading + +| Issue | +|------------| +| **Date Added:** 2019-05-16
**Component:** arangod
**Deployment Mode:** All
**Description:** Bugfix release upgrades such as 3.4.4 to 3.4.5 may not create a backup of the database directory even if they should. Please create a copy manually before upgrading.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x (Windows and Linux)
**Fixed in Versions:** -
**Reference:** [arangodb/planning#3745](https://github.com/arangodb/planning/issues/3745) (internal) | +| **Date Added:** 2019-12-10
**Component:** Installer
**Deployment Mode:** All
**Description:** The NSIS installer for Windows may fail to upgrade an existing installation, e.g. from 3.4.a to 3.4.b (patch release), with the error message: "failed to detect whether we need to Upgrade"
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/release-qa#183](https://github.com/arangodb/release-qa/issues/183) (internal) | +| **Date Added:** 2020-01-07
**Component:** Installer
**Deployment Mode:** All
**Description:** The NSIS installer for Windows can fail to add the path to the ArangoDB binaries to the `PATH` environment variable, silently or with an error.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/release-qa#183](https://github.com/arangodb/release-qa/issues/183) (internal) | +| **Date Added:** 2023-06-06
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** During a cluster upgrade while the supervision is deactivated (maintenance mode), upgraded DB-Server nodes are incorrectly reported to still have the old server version. The versions are visible in the Agency as well as in the **NODES** section of the web interface.
**Affected Versions:** 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-1409](https://arangodb.atlassian.net/browse/BTS-1409) (internal) | + +## Hot Backup + +| Issue | +|------------| +| **Date Added:** 2019-10-09
**Component:** Hot Backup API / arangobackup
**Deployment Mode:** All
**Description:** The Hot Backup feature is not supported in the Windows version of ArangoDB at this point in time.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-10-09
**Component:** Hot Backup API / arangobackup
**Deployment Mode:** DC2DC
**Description:** Hot Backup functionality in Datacenter-to-Datacenter Replication setups is experimental and may not work.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-10-09
**Component:** arangobackup
**Deployment Mode:** All
**Description:** The startup option `--operation` works as positional argument only, e.g. `arangobackup list`. The alternative syntax `arangobackup --operation list` is not accepted.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | + +## Other + +| Issue | +|------------| +| **Date Added:** 2019-05-16
**Component:** Starter
**Deployment Mode:** All
**Description:** The ArangoDB Starter falls back to the IP `[::1]` under macOS. If there is no entry `::1 localhost` in the `/etc/hosts` file or the option `--starter.disable-ipv6` is passed to the starter to use IPv4, then it will hang during startup.
**Affected Versions:** 0.14.3 (macOS only)
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-05-24
**Component:** Web Interface
**Deployment Mode:** Active Failover
**Description:** The web interface shows a wrong replication mode in the replication tab in Active Failover deployments sometimes. It may display Leader/Follower mode (the default value) because of timeouts if `/_api/cluster/endpoints` is requested too frequently.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-04-03
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** Updating the properties of a collection in the cluster may return before the properties are updated consistently on all shards. This is especially visible when setting a schema for a collection with multiple shards, and then instantly starting to store non-conforming documents into the collection. These may be accepted until the properties change has been fully propagated to all shards.
**Affected Versions:** 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2021-04-07
**Component:** arangod
**Deployment Mode:** All
**Description:** The Batch API (HTTP endpoint `/_api/batch`) cannot be used in combination with Stream transactions to submit batched requests, because the required header `x-arango-trx-id` is not forwarded. It only processes `Content-Type` and `Content-Id`.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/arangodb#13552](https://github.com/arangodb/arangodb/issues/13552) | +| **Date Added:** 2021-08-06
**Component:** Installer
**Deployment Mode:** Single Server
**Description:** The Windows installer fails during database initialization with the error `failed to locate tzdata` if there are non-ASCII characters in the destination path.
**Affected Versions:** 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-531](https://arangodb.atlassian.net/browse/BTS-531) (internal) | +| **Date Added:** 2022-09-29
**Component:** ArangoDB Starter
**Deployment Mode:** All
**Description:** The ArangoDB Starter may fail to pick a Docker container name from cgroups.
**Affected Versions:** 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [GT-207](https://arangodb.atlassian.net/browse/GT-207) (internal) | +| **Date Added:** 2023-05-25
**Component:** Web Interface
**Deployment Mode:** All
**Description:** When accessing the Web Interface with Chromium-based browsers, the following issue may occur: the **Download JSON** button in the **Collections** page and the **Download** button in the **Queries** page don't work.
**Affected Versions:** 3.11.x
**Fixed in Versions:** 3.11.2
**Reference:** [BTS-1424](https://arangodb.atlassian.net/browse/BTS-1424) (internal) | +| **Date Added:** 2023-05-25
**Component:** arangod
**Deployment Mode:** Single Server
**Description:** After an upgrade to 3.11.0 from an older version of ArangoDB with existing data, the following issue may occur: when you start a Pregel run or request its status, the Pregel command fails with `ArangoError 1203: _pregel_queries...`. As a workaround, you can manually create the collection by running `db._create("_pregel_queries", { isSystem: true });` in arangosh.
**Affected Versions:** 3.11.0
**Fixed in Versions:** 3.11.1
**Reference:** [arangodb/arangodb#19101](https://github.com/arangodb/arangodb/pull/19101) | +| **Date Added:** 2023-05-25
**Component:** arangod
**Deployment Mode:** All
**Description:** When starting an async job by sending a request with the `x-arango-async: store` or `x-arango-async: keep` HTTP header **and** additionally sending the `accept-encoding: gzip` or `accept-encoding: deflate` HTTP header , the generated response may be compressed twice when fetching the async job's response later via the `/_api/job/` REST API.
**Affected Versions:** 3.11.0
**Fixed in Versions:** 3.11.1
**Reference:** [arangodb/arangodb#19103](https://github.com/arangodb/arangodb/pull/19103) | +| **Date Added:** 2023-06-15
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** After an upgrade to 3.11.0 from an older version of ArangoDB with existing data, the following issue may occur after performing a HotBackup: `_pregel_queries` collections do not exist.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-1462](https://arangodb.atlassian.net/browse/BTS-1462) (internal) | +| **Date Added:** 2023-06-16
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** If more than a certain threshold of queries on the same Coordinator get into the shutdown networking code at the same time, all of them lock up and the Coordinator does not longer process requests.
**Affected Versions:** 3.9.x. 3.10.x, 3.11.x
**Fixed in Versions:** 3.9.12, 3.10.8, 3.11.2
**Reference:** [BTS-1486](https://arangodb.atlassian.net/browse/BTS-1486) (internal) | +| **Date Added:** 2024-03-21
**Component:** arangod
**Deployment Mode:** All
**Description:** When creating an `inverted` index with the `inBackground` option enabled, HTTP API calls like `http://localhost:8529/_api/index?collection=&withHidden=true` don't return the `isBuilding` and `progress` attributes and the progress of the index building can thus not be observed.
**Affected Versions:** 3.10.13, 3.11.7
**Fixed in Versions:** -
**Reference:** [BTS-1788](https://arangodb.atlassian.net/browse/BTS-1788) (internal) | +| **Date Added:** 2024-07-03
**Component:** arangod
**Deployment Mode:** All
**Description:** ArangoDB can crash if run on bare metal and the Linux distribution uses a different glibc version, the `libnss-*` libraries are installed, and the `/etc/nsswitch.conf` configuration file contains settings other than for `files` and `dns` in the `hosts:` line, or the `passwd:` and `group:` lines contain something other than `files`. If you use a fixed version, it can still crash under these circumstances if you enable the `--honor-nsswitch` startup option.
**Affected Versions:** 3.11.10 (non-hotfix)
**Fixed in Versions:** 3.11.10-1
**Reference:** [Incompatibility due to switch to glibc](incompatible-changes-in-oem.md#incompatibilities-due-to-switch-to-glibc) | +| **Date Added:** 2025-01-30
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** If the invariant is violated that ArangoDB's data is not modified while a server is down, manually as well as automatically triggered operations such as moving shards can make a DB-Server the leader (again) even though it may not have the correct data. ArangoDB does currently not protect against certain cases like bringing a DB-Server back without data (by accident or on purpose), which can lead to this empty state getting replicated across the cluster and thus causing data loss.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2025-06-25
**Component:** Web interface
**Deployment Mode:** All
**Description:** Specifying an `X-Script-Name` HTTP header in requests to the web interface (`/_admin/aardvark`) to add a path prefix is non-functional. The feature was originally added in version 3.0 for basic proxy setups but doesn't adequately handle the requests of certain internal services.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | diff --git a/site/content/arangodb/3.12/release-notes/version-oem/whats-new-in-oem.md b/site/content/arangodb/3.12/release-notes/version-oem/whats-new-in-oem.md new file mode 100644 index 0000000000..e50c7abb6b --- /dev/null +++ b/site/content/arangodb/3.12/release-notes/version-oem/whats-new-in-oem.md @@ -0,0 +1,1454 @@ +--- +title: Features and Improvements in ArangoDB OEM LTS +menuTitle: What's New in OEM LTS +weight: 5 +description: >- + Improved performance and reporting for AQL queries, new caching features for + indexed data, improvements to the web interface +--- +The following list shows in detail which features have been added or improved in +ArangoDB OEM LTS. ArangoDB OEM LTS also contains several bug fixes that are not listed +here. + +## ArangoSearch + +### Late materialization improvements + +The number of disk reads required when executing search queries with late +materialization optimizations applied has been reduced so that less data needs +to be requested from the RocksDB storage engine. + +### ArangoSearch column cache (Enterprise Edition) + +[`arangosearch` Views](../../indexes-and-search/arangosearch/arangosearch-views-reference.md) support new caching options. + +Introduced in: v3.9.5, v3.10.2 + +- You can enable the new `cache` option for individual View links or fields + to always cache field normalization values in memory. This can improve the + performance of scoring and ranking queries. + + It also enables caching of auxiliary data used for querying fields that are + indexed with Geo Analyzers. This can improve the performance of geo-spatial + queries. + +- You can enable the new `cache` option in the definition of a `storedValues` + View property to always cache stored values in memory. This can improve the + query performance if stored values are involved. + +--- + +Introduced in: v3.9.6, v3.10.2 + +- You can enable the new `primarySortCache` View property to always cache the + primary sort columns in memory. This can improve the performance of queries + that utilize the primary sort order. + +- You can enable the new `primaryKeyCache` View property to always cache the + primary key column in memory. This can improve the performance of queries + that return many documents. + +--- + +[Inverted indexes](../../develop/http-api/indexes/inverted.md) also support similar new caching +options. + +Introduced in: v3.10.2 + +- A new `cache` option for inverted indexes as the default or for specific + `fields` to always cache field normalization values and Geo Analyzer auxiliary + data in memory. + +- A new `cache` option per object in the definition of the `storedValues` + elements to always cache stored values in memory. + +- A new `cache` option in the `primarySort` property to always cache the + primary sort columns in memory. + +- A new `primaryKeyCache` property for inverted indexes to always cache the + primary key column in memory. + +--- + +The cache size can be controlled with the new `--arangosearch.columns-cache-limit` +startup option and monitored via the new `arangodb_search_columns_cache_size` +metric. + +ArangoSearch caching is only available in the Enterprise Edition. + +See [Optimizing View and inverted index query performance](../../indexes-and-search/arangosearch/performance.md) +for examples. + +{{< info >}} +If you use ArangoSearch caching in supported 3.9 versions and upgrade an +Active Failover deployment to 3.10, you may need to re-configure the +cache-related options and thus recreate inverted indexes and Views. See +[Known Issues in 3.10](../version-3.10/known-issues-in-3-10.md#arangosearch). +{{< /info >}} + +## Analyzers + +### `geo_s2` Analyzer (Enterprise Edition) + +Introduced in: v3.10.5 + +This new Analyzer lets you index GeoJSON data with inverted indexes or Views +similar to the existing `geojson` Analyzer, but it internally uses a format for +storing the geo-spatial data that is more efficient. + +You can choose between different formats to make a tradeoff between the size on +disk, the precision, and query performance: + +- 8 bytes per coordinate pair using 4-byte integer values, with limited precision. +- 16 bytes per coordinate pair using 8-byte floating-point values, which is still + more compact than the VelocyPack format used by the `geojson` Analyzer +- 24 bytes per coordinate pair using the native Google S2 format to reduce the number + of computations necessary when you execute geo-spatial queries. + +This feature is only available in the Enterprise Edition. + +See [Analyzers](../../indexes-and-search/analyzers.md#geo_s2) for details. + +## Web interface + +### New graph viewer + +The graph viewer for visualizing named graphs has been reimplemented based on +the [vis.js](https://visjs.org/) library, the interface +has been redesigned to be cleaner and rewritten to use the React framework, +and the overall performance has been improved. + +The available **Layout** algorithms are **forceAtlas2** and **hierarchical**. +Force-based layouts try to avoid overlaps while grouping adjacent nodes together. +The new hierarchical layout is useful for strict topologies like trees. + +A new feature is the ability to search the visible graph to center a specific +vertex. Another quality-of-life improvement is the **Start node** setting listing +the graph's vertex collections and the available document keys, that you can +also search by. + +![New graph viewer](../../../../images/graphViewer.png) + +You can still switch to the old graph viewer if desired. + +See the [Graph Viewer](../../components/web-interface/graphs.md) documentation for +details. + +### `search-alias` Views + +The 3.11 release of ArangoDB introduces a new web interface for Views that lets +you to create and manage [`search-alias` Views](../../indexes-and-search/arangosearch/search-alias-views-reference.md). + +Through this dialog, you can easily create a new View and add to it one or more +inverted indexes from your collections that you could otherwise do via the HTTP +or JavaScript API. + +When opening your newly created View, you can copy mutable properties from +previously created `search-alias` Views, providing a convenient way to apply +the same settings to multiple Views. In addition, the JSON editor offers the +option to directly write the definition of your View in JSON format. + +For more information, see the +[detailed guide](../../indexes-and-search/arangosearch/search-alias-views-reference.md#create-search-alias-views-using-the-web-interface). + +### `arangosearch` Views + +The existing way of creating and managing `arangosearch` Views through the +web interface has been redesigned, offering a more straightforward approach to add +or modify the definition of your View. The settings, links, and JSON editor have +been merged into a single page, allowing for a much quicker workflow. + +For more information, see the +[detailed guide](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#create-arangosearch-views-using-the-web-interface). + +### Inverted indexes + +The web interface now includes the option for creating +[inverted indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md) on collections. You can set all the +properties directly in the web interface, which previously required the JavaScript +or HTTP API. It also offers an editor where you can write the definition of +your inverted index in JSON format. + +### New sorting mechanism and search box for Saved Queries + +When working with **Saved Queries** in the web interface, you can now +configure their sort order so that your saved queries are listed by the +date they were last modified. +This is particularly helpful when you have a large amount of saved custom +queries and want to see which ones have been created or used recently. + +In addition, the web interface also offers a search box which helps you +quickly find the query you're looking for. + +## AQL + +### Parallel gather + +On Coordinators in cluster deployments, results from different DB-Servers are +combined into a stream of results. This process is called gathering. It shows as +`GatherNode` nodes in the execution plan of AQL queries. + +Previously, a cluster AQL query could only parallelize a `GatherNode` if the +DB-Server query part above it (in terms of query execution plan layout) was a +terminal part of the query. That means that it was not allowed for other nodes of +type `ScatterNode`, `GatherNode`, or `DistributeNode` to be present in the query. + +Modification queries were also not allowed to use parallel gather unless the +`--query.parallelize-gather-writes` startup option was enabled, which defaulted +to `false`. + +From v3.11.0 onward, these limitations are removed so that parallel gather can be +used in almost all queries. As a result, the feature is enabled by default and +the `--query.parallelize-gather-writes` startup option is now obsolete. You can +still disable the optimization by disabling the `parallelize-gather` AQL +optimizer rule. + +The only case where parallel gather is not supported is when using traversals, +although there are some exceptions for Disjoint SmartGraphs where the traversal +can run completely on the local DB-Server (only available in the Enterprise Edition). + +The parallel gather optimization can not only speed up queries quite significantly, +but also overcome issues with the previous serial processing within `GatherNode` +nodes, which could lead to high memory usage on Coordinators caused by buffering +of documents for other shards, and timeouts on some DB-Servers because query parts +were idle for too long. + +### Optimized access of last element in traversals + +If you use a `FOR` operation for an AQL graph traversal like `FOR v, e, p IN ...` +and later access the last vertex or edge via the path variable `p`, like +`FILTER p.vertices[-1].name == "ArangoDB"` or `FILTER p.edges[-1].weight > 5`, +the access is transformed to use the vertex variable `v` or edge variable `e` +instead, like `FILTER v.name == "ArangoDB"` or `FILTER e.weight > 5`. This is +cheaper to compute because the path variable `p` may not need to be computed at +all, and it can enable further optimizations that are not possible on `p`. + +The new `optimize-traversal-last-element-access` optimization rule appears in +query execution plans if this optimization is applied. + +### Faster bulk `INSERT` operations in clusters + +AQL `INSERT` operations that insert multiple documents can now be faster in +cluster deployments by avoiding unnecessary overhead that AQL queries typically +require for the setup and shutdown in a cluster, as well as for the internal +batching. + +This improvement also decreases the number of HTTP requests to the DB-Servers. +Instead of batching the array of documents (with a default batch size of `1000`), +a single request per DB-Server is used internally to transfer the data. + +The optimization brings the AQL `INSERT` performance close to the performance of +the specialized HTTP API for [creating multiple documents](../../develop/http-api/documents.md#create-multiple-documents). + +The pattern that is recognized by the optimizer is as follows: + +```aql +FOR doc IN INSERT doc INTO collection +``` + +`` can either be a bind parameter, a variable, or an array literal. +The value needs to be an array of objects and be known at query compile time. + +```aql +Query String (43 chars, cacheable: false): + FOR doc IN @docs INSERT doc INTO collection + +Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode COOR 1 * ROOT + 2 CalculationNode COOR 1 - LET #2 = [ { "value" : 1 }, { "value" : 2 }, { "value" : 3 } ] /* json expression */ /* const assignment */ + 5 MultipleRemoteModificationNode COOR 3 - FOR doc IN #2 INSERT doc IN collection + +Indexes used: + none + +Optimization rules applied: + Id RuleName + 1 remove-data-modification-out-variables + 2 optimize-cluster-multiple-document-operations +``` + +The query runs completely on the Coordinator. The `MultipleRemoteModificationNode` +performs a bulk document insert for the whole input array in one go, internally +using a transaction that is more lightweight for transferring the data to the +DB-Servers than a regular AQL query. + +Without the optimization, the Coordinator requests data from the DB-Servers +(`GatherNode`), but the DB-Servers have to contact the Coordinator in turn to +request their data (`DistributeNode`), involving a network request for every +batch of documents: + +```aql +Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode COOR 1 * ROOT + 2 CalculationNode COOR 1 - LET #2 = [ { "value" : 1 }, { "value" : 2 }, { "value" : 3 } ] /* json expression */ /* const assignment */ + 3 EnumerateListNode COOR 3 - FOR doc IN #2 /* list iteration */ + 9 CalculationNode COOR 3 - LET #4 = MAKE_DISTRIBUTE_INPUT_WITH_KEY_CREATION(doc, null, { "allowSpecifiedKeys" : false, "ignoreErrors" : false, "collection" : "collection" }) /* simple expression */ + 5 DistributeNode COOR 3 - DISTRIBUTE #4 + 6 RemoteNode DBS 3 - REMOTE + 4 InsertNode DBS 0 - INSERT #4 IN collection + 7 RemoteNode COOR 0 - REMOTE + 8 GatherNode COOR 0 - GATHER /* parallel, unsorted */ +``` + +The new `optimize-cluster-multiple-document-operations` optimizer rule that +enables the optimization is only applied if there is no `RETURN` operation, +which means you cannot use `RETURN NEW` or similar to access the new documents +including their document keys. Additionally, all preceding calculations must be +constant, which excludes any subqueries that read documents. + +See the list of [optimizer rules](../../aql/execution-and-performance/query-optimization.md#optimizer-rules) +for details. + +### Index cache refilling + +The [edge cache refilling](../version-3.10/whats-new-in-3-10.md#edge-cache-refilling-experimental) +feature introduced in v3.9.6 and v3.10.2 is no longer experimental. From v3.11.0 +onward, it is called _**index** cache refilling_ and is not limited to edge caches +anymore, but also supports in-memory hash caches of persistent indexes +(persistent indexes with the `cacheEnabled` option set to `true`). + +This new feature automatically refills the in-memory index caches. +When documents (including edges) are added, modified, or removed and if this +affects an edge index or cache-enabled persistent indexes, these changes are +tracked and a background thread tries to update the index caches accordingly if +the feature is enabled, by adding new, updating existing, or deleting and +refilling cache entries. + +You can enable it for individual `INSERT`, `UPDATE`, `REPLACE`, and `REMOVE` +operations in AQL queries (using `OPTIONS { refillIndexCaches: true }`), for +individual document API requests that insert, update, replace, or remove single +or multiple documents (by setting `refillIndexCaches=true` as query +parameter), as well as enable it by default using the new +`--rocksdb.auto-refill-index-caches-on-modify` startup option. + +The new `--rocksdb.auto-refill-index-caches-queue-capacity` startup option +restricts how many index cache entries the background thread can queue at most. +This limits the memory usage for the case of the background thread being slower +than other operations that invalidate index cache entries. + +The background refilling is done on a best-effort basis and not guaranteed to +succeed, for example, if there is no memory available for the cache subsystem, +or during cache grow/shrink operations. A background thread is used so that +foreground write operations are not slowed down by a lot. It may still cause +additional I/O activity to look up data from the storage engine to repopulate +the cache. + +In addition to refilling the index caches, the caches can also automatically be +seeded on server startup. Use the new `--rocksdb.auto-fill-index-caches-on-startup` +startup option to enable this feature. It may cause additional CPU and I/O load. +You can limit how many index filling operations can execute concurrently with the +`--rocksdb.max-concurrent-index-fill-tasks` option. The lower this number, the +lower the impact of the cache filling, but the longer it takes to complete. + +The following metrics are available: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_auto_refill_loaded_total` | Total number of queued items for in-memory index caches refilling. +| `rocksdb_cache_auto_refill_dropped_total` | Total number of dropped items for in-memory index caches refilling. +| `rocksdb_cache_full_index_refills_total` | Total number of in-memory index caches refill operations for entire indexes. + +This feature is experimental. + +Also see: +- [AQL `INSERT` operation](../../aql/high-level-operations/insert.md#refillindexcaches) +- [AQL `UPDATE` operation](../../aql/high-level-operations/update.md#refillindexcaches) +- [AQL `REPLACE` operation](../../aql/high-level-operations/replace.md#refillindexcaches) +- [AQL `REMOVE` operation](../../aql/high-level-operations/remove.md#refillindexcaches) +- [Document HTTP API](../../develop/http-api/documents.md) +- [Index cache refill options](#index-cache-refill-options) + +### Retry request for result batch + +You can retry the request for the latest result batch of an AQL query cursor if +you enable the new `allowRetry` query option. See +[API Changes in ArangoDB 3.11](api-changes-in-oem.md#cursor-api) +for details. + +### `COLLECT ... INTO` can use `hash` method + +Grouping with the `COLLECT` operation supports two different methods, `hash` and +`sorted`. For `COLLECT` operations with an `INTO` clause, only the `sorted` method +was previously supported, but the `hash` variant has been extended to now support +`INTO` clauses as well. + +```aql +FOR i IN 1..10 + COLLECT v = i % 2 INTO group // OPTIONS { method: "hash" } + SORT null + RETURN { v, group } +``` + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 2 CalculationNode 1 - LET #3 = 1 .. 10 /* range */ /* simple expression */ + 3 EnumerateListNode 10 - FOR i IN #3 /* list iteration */ + 4 CalculationNode 10 - LET #5 = (i % 2) /* simple expression */ + 5 CollectNode 8 - COLLECT v = #5 INTO group KEEP i /* hash */ + 8 CalculationNode 8 - LET #9 = { "v" : v, "group" : group } /* simple expression */ + 9 ReturnNode 8 - RETURN #9 +``` + +The query optimizer automatically chooses the `hash` method for the above +example query, but you can also specify your preferred method explicitly. + +See the [`COLLECT` options](../../aql/high-level-operations/collect.md#method) for details. + +### K_SHORTEST_PATHS performance improvements + +The `K_SHORTEST_PATHS` graph algorithm in AQL has been refactored in ArangoDB 3.11, +resulting in major performance improvements. The query now returns the +shortest paths between two documents in a graph up to 100 times faster. + +### Added AQL functions + +Added the `DATE_ISOWEEKYEAR()` function that returns the ISO week number, +like `DATE_ISOWEEK()` does, but also the year it belongs to: + +```aql +RETURN DATE_ISOWEEKYEAR("2023-01-01") // { "week": 52, "year": 2022 } +``` + +See [AQL Date functions](../../aql/functions/date.md#date_isoweekyear) for details. + +--- + +Added the `SHA256()` function that calculates the SHA256 checksum for a string +and returns it in a hexadecimal string representation. + +```aql +RETURN SHA256("ArangoDB") // "acbd84398a61fcc6fd784f7e16c32e02a0087fd5d631421bf7b5ede5db7fda31" +``` + +See [AQL String functions](../../aql/functions/string.md#sha256) for details. + +### Extended query explain statistics + +Introduced in: v3.10.4 + +The query explain result now includes the peak memory usage and execution time. +This helps finding queries that use a lot of memory or take long to build the +execution plan. + +The additional statistics are displayed at the end of the output in the +web interface (using the **Explain** button in the **QUERIES** section) and in +_arangosh_ (using `db._explain()`): + +``` +44 rule(s) executed, 1 plan(s) created, peak mem [b]: 32768, exec time [s]: 0.00214 +``` + +The HTTP API returns the extended statistics in the `stats` attribute when you +use the `POST /_api/explain` endpoint: + +```json +{ + ... + "stats": { + "rulesExecuted": 44, + "rulesSkipped": 0, + "plansCreated": 1, + "peakMemoryUsage": 32768, + "executionTime": 0.00241307167840004 + } +} +``` + +Also see: +- [API Changes in ArangoDB 3.11](api-changes-in-oem.md#explain-api) +- [The AQL query optimizer](../../aql/execution-and-performance/query-optimization.md#optimizer-statistics) + +### Extended peak memory usage reporting + +The peak memory usage of AQL queries is now also reported for running queries +and slow queries. + +In the web interface, you can find the **Peak memory usage** column in the +**QUERIES** section, in the **Running Queries** and **Slow Query History** tabs. + +In the JavaScript and HTTP APIs, the value is reported as `peakMemoryUsage`. +See [API Changes in ArangoDB 3.11](api-changes-in-oem.md#query-api). + +### Number of cluster requests in profiling output + +Introduced in: v3.9.5, v3.10.2 + +The query profiling output in the web interface and _arangosh_ now shows the +number of HTTP requests for queries that you run against cluster deployments in +the `Query Statistics`: + +```aql +Query String (33 chars, cacheable: false): + FOR doc IN coll + RETURN doc._key + +Execution plan: + Id NodeType Site Calls Items Filtered Runtime [s] Comment + 1 SingletonNode DBS 3 3 0 0.00024 * ROOT + 9 IndexNode DBS 3 0 0 0.00060 - FOR doc IN coll /* primary index scan, index only (projections: `_key`), 3 shard(s) */ + 3 CalculationNode DBS 3 0 0 0.00025 - LET #1 = doc.`_key` /* attribute expression */ /* collections used: doc : coll */ + 7 RemoteNode COOR 6 0 0 0.00227 - REMOTE + 8 GatherNode COOR 2 0 0 0.00209 - GATHER /* parallel, unsorted */ + 4 ReturnNode COOR 2 0 0 0.00008 - RETURN #1 + +Indexes used: + By Name Type Collection Unique Sparse Cache Selectivity Fields Stored values Ranges + 9 primary primary coll true false false 100.00 % [ `_key` ] [ ] * + +Optimization rules applied: + Id RuleName + 1 scatter-in-cluster + 2 distribute-filtercalc-to-cluster + 3 remove-unnecessary-remote-scatter + 4 reduce-extraction-to-projection + 5 parallelize-gather + +Query Statistics: + Writes Exec Writes Ign Scan Full Scan Index Cache Hits/Misses Filtered Requests Peak Mem [b] Exec Time [s] + 0 0 0 0 0 / 0 0 9 32768 0.00564 +``` + +### New stage in query profiling output + +Introduced in: v3.10.3 + +The query profiling output has a new `instantiating executors` stage. +The time spent in this stage is the time needed to create the query executors +from the final query execution time. In cluster mode, this stage also includes +the time needed for physically distributing the query snippets to the +participating DB-Servers. Previously, the time spent for instantiating executors +and the physical distribution was contained in the `optimizing plan` stage. + +``` +Query Profile: + Query Stage Duration [s] + initializing 0.00001 + parsing 0.00009 + optimizing ast 0.00001 + loading collections 0.00001 + instantiating plan 0.00004 + optimizing plan 0.00088 + instantiating executors 0.00153 + executing 1.27349 + finalizing 0.00091 +``` + +### Limit for the normalization of `FILTER` conditions + +Converting complex AQL `FILTER` conditions with a lot of logical branches +(`AND`, `OR`, `NOT`) into the internal DNF (disjunctive normal form) format can +take a large amount of processing time and memory. The new `maxDNFConditionMembers` +query option is a threshold for the maximum number of `OR` sub-nodes in the +internal representation and defaults to `786432`. + +You can also set the threshold globally instead of per query with the +[`--query.max-dnf-condition-members` startup option](../../components/arangodb-server/options.md#--querymax-dnf-condition-members). + +If the threshold is hit, the query continues with a simplified representation of +the condition, which is **not usable in index lookups**. However, this should +still be better than overusing memory or taking a very long time to compute the +DNF version. + +## Server options + +### Telemetrics + +Starting with version 3.11, ArangoDB automatically gathers information on how +it is used and the features being utilized. This data is used to identify the +primary usage patterns and features, and to measure their adoption rate. + +The information collected by ArangoDB is anonymous and purely statistical. +It does not contain any personal information like usernames or IP addresses, nor +any content of the documents stored in ArangoDB. This means that your privacy is +protected, and that there is no risk of your data being compromised. + +If for any reason you prefer not to share usage statistics with ArangoDB, you +can easily disable this feature by setting the new `--server.telemetrics-api` +startup option to `false`. The default value is `true`. + +For a detailed list of what anonymous metrics ArangoDB collects see +[Telemetrics](../../operations/administration/telemetrics.md). + +### Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +This feature is **experimental** in ArangoDB 3.11, but will become the norm in +a future version. + +Running the server with the option enabled provides support for extended names +that are not comprised within the ASCII table, such as Japanese or Arabic +letters, emojis, letters with accentuation. Also, many ASCII characters that +were formerly banned by the traditional naming constraints are now accepted. + +Example collection, View, and index names that can be used with the new extended +constraints: `España`, `😀`, `犬`, `كلب`, `@abc123`, `København`, `München`, +`Бишкек`, `abc? <> 123!` + +Using extended collection and View names in the JavaScript API such as in +_arangosh_ or Foxx may require using the square bracket notation instead of the +dot notation for property access depending on the characters you use: + +```js +db._create("🥑~колекція =)"); +db.🥑~колекція =).properties(); // dot notation (syntax error) +db["🥑~колекція =)"].properties() // square bracket notation +``` + +Using extended collection and View names in AQL queries requires wrapping the +name in backticks or forward ticks (see [AQL Syntax](../../aql/fundamentals/syntax.md#names)): + +```aql +FOR doc IN `🥑~колекція =)` + RETURN doc +``` + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Note that the default value for `--database.extended-names` is `false` +for compatibility with existing client drivers and applications that only support +ASCII names according to the traditional naming constraints used in previous +ArangoDB versions. Enabling the feature may lead to incompatibilities up to the +ArangoDB instance becoming inaccessible for such drivers and client applications. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming constraints +for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +### Verify `.sst` files + +The new `--rocksdb.verify-sst` startup option lets you validate the `.sst` files +currently contained in the database directory on startup. If set to `true`, +on startup, all SST files in the `engine-rocksdb` folder in the database +directory are validated, then the process finishes execution. +The default value is `false`. + +### Support for additional value suffixes + +Numeric startup options support suffixes like `m` (megabytes) and `GiB` (gibibytes) +to make it easier to specify values that are expected in bytes. The following +suffixes are now also supported: + +- `tib`, `TiB`, `TIB`: tebibytes (factor 10244) +- `t`, `tb`, `T`, `TB`: terabytes (factor 10004) +- `b`, `B`: bytes (factor 1) + +Example: `arangod --rocksdb.total-write-buffer-size 2TiB` + +See [Suffixes for numeric options](../../operations/administration/configuration.md#suffixes-for-numeric-options) +for details. + +### Configurable status code if write concern not fulfilled + +In cluster deployments, you can use a replication factor greater than `1` for +collections. This creates additional shard replicas for redundancy. For write +operations to these collections, you can define how many replicas need to +acknowledge the write for the operation to succeed. This option is called the +write concern. If there are not enough in-sync replicas available, the +write concern cannot be fulfilled. An error with the HTTP `403 Forbidden` +status code is returned immediately in this case. + +You can now change the status code via the new +`--cluster.failed-write-concern-status-code` startup option. It defaults to `403` +but you can set it to `503` to use an HTTP `503 Service Unavailable` status code +instead. This signals client applications that it is a temporary error. + +Note that no automatic retry of the operation is attempted by the cluster if you +set the startup option to `503`. It only changes the status code to one that +doesn't signal a permanent error like `403` does. +It is up to client applications to retry the operation. + +### RocksDB BLOB storage (experimental) + +From version 3.11 onward, ArangoDB can make use of RocksDB's integrated BLOB +(binary large object) storage for larger documents, called _BlobDB_. +This is currently an experimental feature, not supported and should not be used in production. + +[BlobDB is an integral part of RocksDB](https://rocksdb.org/blog/2021/05/26/integrated-blob-db.html) +and provides a key-value separation: large values are stored in dedicated BLOB +files, and only a small pointer to them is stored in the LSM tree's SST files. +Storing values separate from the keys means that the values do no need to be moved +through the LSM tree by the compaction. This reduces write amplification and is +especially beneficial for large values. + +When the option is enabled in ArangoDB, the key-value separation is used for +the documents column family, because large values are mostly to be expected here. +The cutoff value for the key-value separation is configurable by a startup option, +i.e. the administrator can set a size limit for values from which onwards they +are offloaded to separate BLOB files. This allows storing small documents +inline with the keys as before, but still benefit from reduced write amplification +for larger documents. + +BlobDB is disabled by default in ArangoDB 3.11. +Using BlobDB in ArangoDB is experimental and not recommended in production. It is +made available as an experimental feature so that further tests and tuning can be +done by interested parties. Future versions of ArangoDB may declare the feature +production-ready and even enable BlobDB by default. + +There are currently a few caveats when using BlobDB in ArangoDB: + +- Even though BlobDB can help reduce the write amplification, it may increase the + read amplification and may worsen the read performance for some workloads. +- The various tuning parameters that BlobDB offers are made available in ArangoDB, + but the current default settings for the BlobDB tuning options are not ideal + for many use cases and need to be adjusted by administrators first. +- It is very likely that the default settings for the BlobDB tuning options will + change in future versions of ArangoDB. +- Memory and disk usage patterns are different to that of versions running without + BlobDB enabled. It is very likely that memory limits and disk capacity may + need to be adjusted. +- Some metrics for observing RocksDB do not react properly when BlobDB is in use. +- The built-in throttling mechanism for controlling the write-throughput + slows down writes too much when BlobDB is used. This can be circumvented with + tuning parameters, but the defaults may be too aggressive. + +The following experimental startup options have been added in ArangoDB 3.11 to +enable and configure BlobDB: + +- `--rocksdb.enable-blob-files`: Enable the usage of BLOB files for the + documents column family. This option defaults to `false`. All following + options are only relevant if this option is set to `true`. +- `--rocksdb.min-blob-size`: Size threshold for storing large documents in + BLOB files (in bytes, 0 = store all documents in BLOB files). +- `--rocksdb.blob-file-size`: Size limit for BLOB files in the documents + column family (in bytes). Note that RocksDB counts the size of uncompressed + BLOBs before checking if a new BLOB file needs to be started, even though + the BLOB may be compressed and end up much smaller than uncompressed. +- `--rocksdb.blob-compression-type`: Compression algorithm to use for BLOB + data in the documents column family. +- `--rocksdb.enable-blob-garbage-collection`: Enable BLOB garbage collection + during compaction in the documents column family. +- `--rocksdb.blob-garbage-collection-age-cutoff`: Age cutoff for garbage + collecting BLOB files in the documents column family (percentage value from + 0 to 1 determines how many BLOB files are garbage collected during + compaction). +- `--rocksdb.blob-garbage-collection-force-threshold`: Garbage ratio + threshold for scheduling targeted compactions for the oldest BLOB files + in the documents column family. + +Note that ArangoDB's built-in throttling mechanism that automatically adjusts +the write rate for RocksDB may need to be reconfigured as well to see the +benefits of BlobDB. The relevant startup options for the throttle are: + +- `--rocksdb.throttle-lower-bound-bps` +- `--rocksdb.throttle-max-write-rate` +- `--rocksdb.throttle-slow-down-writes-trigger` + +### `--query.max-dnf-condition-members` option + +See [Limit for the normalization of `FILTER` conditions](#limit-for-the-normalization-of-filter-conditions). + +### `--rocksdb.reserve-file-metadata-memory` option + +This new startup option controls whether to account for `.sst` file metadata +memory in the block cache. + +### ArangoSearch column cache limit + +Introduced in: v3.9.5, v3.10.2 + +The new `--arangosearch.columns-cache-limit` startup option lets you control how +much memory (in bytes) the [ArangoSearch column cache](#arangosearch-column-cache-enterprise-edition) +is allowed to use. + +Introduced in: v3.10.6 + +You can reduce the memory usage of the column cache in cluster deployments by +only using the cache for leader shards with the new +[`--arangosearch.columns-cache-only-leader` startup option](../../components/arangodb-server/options.md#--arangosearchcolumns-cache-only-leader). +It is disabled by default, which means followers also maintain a column cache. + +### AQL query logging + +Introduced in: v3.9.5, v3.10.2 + +There are three new startup options to configure how AQL queries are logged: + +- `--query.log-failed` for logging all failed AQL queries, to be used during + development or to catch unexpected failed queries in production (off by default) +- `--query.log-memory-usage-threshold` to define a peak memory threshold from + which on a warning is logged for AQL queries that exceed it (default: 4 GB) +- `--query.max-artifact-log-length` for controlling the length of logged query + strings and bind parameter values. Both are truncated to 4096 bytes by default. + +### Index cache refill options + +Introduced in: v3.9.6, v3.10.2 + +- `--rocksdb.auto-refill-index-caches-on-modify`: Whether to automatically + (re-)fill in-memory index cache entries on insert/update/replace operations + by default. Default: `false`. +- `--rocksdb.auto-refill-index-caches-queue-capacity`: How many changes can be + queued at most for automatically refilling the index cache. Default: `131072`. +- `--rocksdb.auto-fill-index-caches-on-startup`: Whether to automatically fill + the in-memory index cache with entries on server startup. Default: `false`. +- `--rocksdb.max-concurrent-index-fill-tasks`: The maximum number of index fill + tasks that can run concurrently on server startup. Default: the number of + cores divided by 8, but at least `1`. + +--- + +Introduced in: v3.9.10, v3.10.5 + +- `--rocksdb.auto-refill-index-caches-on-followers`: Control whether automatic + refilling of in-memory caches should happen on followers or only leaders. + The default value is `true`, i.e. refilling happens on followers, too. + +### Cluster supervision options + +Introduced in: v3.9.6, v3.10.2 + +The following new options allow you to delay supervision actions for a +configurable amount of time. This is desirable in case DB-Servers are restarted +or fail and come back quickly because it gives the cluster a chance to get in +sync and fully resilient without deploying additional shard replicas and thus +without causing any data imbalance: + +- `--agency.supervision-delay-add-follower`: + The delay in supervision, before an AddFollower job is executed (in seconds). + +- `--agency.supervision-delay-failed-follower`: + The delay in supervision, before a FailedFollower job is executed (in seconds). + +Introduced in: v3.9.7, v3.10.2 + +A `--agency.supervision-failed-leader-adds-follower` startup option has been +added with a default of `true` (behavior as before). If you set this option to +`false`, a `FailedLeader` job does not automatically configure a new shard +follower, thereby preventing unnecessary network traffic, CPU load, and I/O load +for the case that the server comes back quickly. If the server has permanently +failed, an `AddFollower` job is created anyway eventually, as governed by the +`--agency.supervision-delay-add-follower` option. + +### RocksDB Bloom filter option + +Introduced in: v3.10.3 + +A new `--rocksdb.bloom-filter-bits-per-key` startup option has been added to +configure the number of bits to use per key in a Bloom filter. + +The default value is `10`, which is downwards-compatible to the previously +hard-coded value. + +### Disable user-defined AQL functions + +Introduced in: v3.10.4 + +The new `--javascript.user-defined-functions` startup option lets you disable +user-defined AQL functions so that no user-defined JavaScript code of +[UDFs](../../aql/user-defined-functions.md) runs on the server. This can be useful to close off +a potential attack vector in case no user-defined AQL functions are used. +Also see [Server security options](../../operations/security/security-options.md). + +### Option to disable Foxx + +Introduced in: v3.10.5 + +A `--foxx.enable` startup option has been added to let you configure whether +access to user-defined Foxx services is possible for the instance. It defaults +to `true`. + +If you set the option to `false`, access to Foxx services is forbidden and is +responded with an HTTP `403 Forbidden` error. Access to the management APIs for +Foxx services are also disabled as if you set `--foxx.api false` manually. + +Access to ArangoDB's built-in web interface, which is also a Foxx service, is +still possible even with the option set to `false`. + +Disabling the access to Foxx can be useful to close off a potential attack +vector in case Foxx is not used. +Also see [Server security options](../../operations/security/security-options.md). + +### RocksDB auto-flushing + +Introduced in: v3.9.10, v3.10.5 + +A new feature for automatically flushing RocksDB Write-Ahead Log (WAL) files and +in-memory column family data has been added. + +An auto-flush occurs if the number of live WAL files exceeds a certain threshold. +This ensures that WAL files are moved to the archive when there are a lot of +live WAL files present, for example, after a restart. In this case, RocksDB does +not count any previously existing WAL files when calculating the size of WAL +files and comparing its `max_total_wal_size`. Auto-flushing fixes this problem, +but may prevent WAL files from being moved to the archive quickly. + +You can configure the feature via the following new startup options: +- `--rocksdb.auto-flush-min-live-wal-files`: + The minimum number of live WAL files that triggers an auto-flush. Defaults to `10`. +- `--rocksdb.auto-flush-check-interval`: + The interval (in seconds) in which auto-flushes are executed. Defaults to `3600`. + Note that an auto-flush is only executed if the number of live WAL files + exceeds the configured threshold and the last auto-flush is longer ago than + the configured auto-flush check interval. This avoids too frequent auto-flushes. + +### Configurable whitespace in metrics + +Introduced in: v3.10.6 + +The output format of the metrics API slightly changed in v3.10.0. It no longer +had a space between the label and the value for metrics with labels. Example: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"}0 +``` + +The new `--server.ensure-whitespace-metrics-format` startup option lets you +control whether the metric label and value shall be separated by a space for +improved compatibility with some tools. This option is enabled by default. +From v3.10.6 onward, the default output format looks like this: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"} 0 +``` + +### Configurable interval when counting open file descriptors + +Introduced in: v3.10.7 + +The `--server.count-descriptors-interval` startup option can be used to specify +the update interval in milliseconds when counting the number of open file +descriptors. + +The default value is `60000`, i.e. the update interval is once per minute. +To disable the counting of open file descriptors, you can set the value to `0`. +If counting is turned off, the `arangodb_file_descriptors_current` metric +reports a value of `0`. + +### Configurable limit of collections per query + +Introduced in: v3.10.7, v3.11.1 + +The `--query.max-collections-per-query` startup option allows you to adjust the +previously fixed limit for the maximum number of collections/shards per AQL query. +The default value is `2048`, which is equal to the fixed limit of +collections/shards in older versions. + +### Custom arguments to rclone + +Introduced in: v3.9.11, v3.10.7, v3.11.1 + +The `--rclone.argument` startup option can be used to prepend custom arguments +to rclone. For example, you can enable debug logging to a separate file on +startup as follows: + +``` +arangod --rclone.argument "--log-level=DEBUG" --rclone.argument "--log-file=rclone.log" +``` + +### LZ4 compression for values in the in-memory edge cache + +Introduced in: v3.11.2 + +LZ4 compression of edge index cache values allows to store more data in main +memory than without compression, so the available memory can be used more +efficiently. The compression is transparent and does not require any change to +queries or applications. +The compression can add CPU overhead for compressing values when storing them +in the cache, and for decompressing values when fetching them from the cache. + +The new startup option `--cache.min-value-size-for-edge-compression` can be +used to set a threshold value size for compression edge index cache payload +values. The default value is `1GB`, which effectively turns compression +off. Setting the option to a lower value (i.e. `100`) turns on the +compression for any payloads whose size exceeds this value. + +The new startup option `--cache.acceleration-factor-for-edge-compression` can +be used to fine-tune the compression. The default value is `1`. +Higher values typically mean less compression but faster speeds. + +The following new metrics can be used to determine the usefulness of +compression: + +- `rocksdb_cache_edge_inserts_effective_entries_size_total`: returns the total + number of bytes of all entries that were ever stored in the in-memory edge cache, + after compression was attempted/applied. This metric is populated regardless + of whether compression is used or not. +- `rocksdb_cache_edge_inserts_uncompressed_entries_size_total`: returns the total + number of bytes of all entries that were ever stored in the in-memory edge + cache, before compression was applied. This metric is populated regardless of + whether compression is used or not. +- `rocksdb_cache_edge_compression_ratio`: returns the effective + compression ratio for all edge cache entries ever stored in the cache. + +Note that these metrics are increased upon every insertion into the edge +cache, but not decreased when data gets evicted from the cache. + +### Limit the number of databases in a deployment + +Introduced in: v3.10.10, v3.11.2 + +The `--database.max-databases` startup option allows you to limit the +number of databases that can exist in parallel in a deployment. You can use this +option to limit the resources used by database objects. If the option is used +and there are already as many databases as configured by this option, any +attempt to create an additional database fails with error +`32` (`ERROR_RESOURCE_LIMIT`). Additional databases can then only be created +if other databases are dropped first. The default value for this option is +unlimited, so an arbitrary amount of databases can be created. + +### Cluster-internal connectivity checks + +Introduced in: v3.11.5 + +This feature makes Coordinators and DB-Servers in a cluster periodically send +check requests to each other, in order to see if all nodes can connect to +each other. +If a cluster-internal connection to another Coordinator or DB-Server cannot +be established within 10 seconds, a warning is now logged. + +The new `--cluster.connectivity-check-interval` startup option can be used +to control the frequency of the connectivity check, in seconds. +If set to a value greater than zero, the initial connectivity check is +performed approximately 15 seconds after the instance start, and subsequent +connectivity checks are executed with the specified frequency. +If set to `0`, connectivity checks are disabled. + +You can also use the following metrics to monitor and detect temporary or +permanent connectivity issues: +- `arangodb_network_connectivity_failures_coordinators`: Number of failed + connectivity check requests sent by this instance to Coordinators. +- `arangodb_network_connectivity_failures_dbservers_total`: Number of failed + connectivity check requests sent to DB-Servers. + +### Configurable maximum for queued log entries + +Introduced in: v3.10.12, v3.11.5 + +The new `--log.max-queued-entries` startup option lets you configure how many +log entries are queued in a background thread. + +Log entries are pushed on a queue for asynchronous writing unless you enable the +`--log.force-direct` startup option. If you use a slow log output (e.g. syslog), +the queue might grow and eventually overflow. + +You can configure the upper bound of the queue with this option. If the queue is +full, log entries are written synchronously until the queue has space again. + +### Monitoring per collection/database/user + +Introduced in: v3.10.13, v3.11.7 + +The following metrics have been introduced to track per-shard requests on +DB-Servers: +- `arangodb_collection_leader_reads_total`: The number of read requests on + leaders, per shard, and optionally also split by user. +- `arangodb_collection_leader_writes_total`: The number of write requests on + leaders, per shard, and optionally also split by user. +- `arangodb_collection_requests_bytes_read_total`: The number of bytes read in + read requests on leaders. +- `arangodb_collection_requests_bytes_written_total`: The number of bytes written + in write requests on leaders and followers. + +To opt into these metrics, you can use the new `--server.export-shard-usage-metrics` +startup option. It can be set to one of the following values on DB-Servers: +- `disabled`: No shard usage metrics are recorded nor exported. This is the + default value. +- `enabled-per-shard`: This makes DB-Servers collect per-shard usage metrics. +- `enabled-per-shard-per-user`: This makes DB-Servers collect per-shard + and per-user metrics. This is more granular than `enabled-per-shard` but + can produce a lot of metrics. + +Whenever a shard is accessed in read or write mode by one of the following +operations, the metrics are populated dynamically, either with a per-user +label or not, depending on the above setting. +The metrics are retained in memory on DB-Servers. Removing databases, +collections, or users that are already included in the metrics won't remove +the metrics until the DB-Server is restarted. + +The following operations increase the metrics: +- AQL queries: an AQL query increases the read or write counters exactly + once for each involved shard. For shards that are accessed in read/write + mode, only the write counter is increased. +- Single-document insert, update, replace, and remove operations: for each + such operation, the write counter is increased once for the affected + shard. +- Multi-document insert, update, replace, and remove operations: for each + such operation, the write counter is increased once for each shard + that is affected by the operation. Note that this includes collection + truncate operations. +- Single and multi-document read operations: for each such operation, the + read counter is increased once for each shard that is affected by the + operation. + +The metrics are increased when any of the above operations start, and they +are not decreased should an operation abort or if an operation does not +lead to any actual reads or writes. + +As there can be many of these dynamic metrics based on the number of shards +and/or users in the deployment, these metrics are turned off by default. +When turned on, the metrics are exposed only via the new +`GET /_admin/usage-metrics` endpoint. They are not exposed via the existing +metrics `GET /_admin/metrics` endpoint. + +Note that internal operations, such as internal queries executed for statistics +gathering, internal garbage collection, and TTL index cleanup are not counted in +these metrics. Additionally, all requests that are using the superuser JWT for +authentication and that do not have a specific user set are not counted. + +Enabling these metrics can likely result in a small latency overhead of a few +percent for write operations. The exact overhead depends on +several factors, such as the type of operation (single or multi-document operation), +replication factor, network latency, etc. + +## Miscellaneous changes + +### Write-write conflict improvements + +It is now less likely that writes to the same document in quick succession +result in write-write conflicts for single document operations that use the +Document HTTP API. See +[Incompatible changes in ArangoDB 3.11](incompatible-changes-in-oem.md#write-write-conflict-improvements) +about the detailed behavior changes. + +### Trace logs for graph traversals and path searches + +Detailed information is now logged if you run AQL graph traversals +or (shortest) path searches with AQL and set the +log level to `TRACE` for the `graphs` log topic. This information is fairly +low-level but can help to understand correctness and performance issues with +traversal queries. There are also some new log messages for the `DEBUG` level. + +To enable tracing for traversals and path searches at startup, you can set +`--log.level graphs=trace`. + +To enable or disable it at runtime, you can call the +[`PUT /_admin/log/level`](../../develop/http-api/monitoring/logs.md#set-the-server-log-levels) +endpoint of the HTTP API and set the log level using a request body like +`{"graphs":"TRACE"}`. + +### Persisted Pregel execution statistics + +Pregel algorithm executions now persist execution statistics to a system +collection. The statistics are kept until you remove them, whereas the +previously existing interfaces only store the information about Pregel jobs +temporarily in memory. + +To access and delete persisted execution statistics, you can use the newly added +`history()` and `removeHistory()` JavaScript API methods of the Pregel module: + +```js +var pregel = require("@arangodb/pregel"); +const execution = pregel.start("sssp", "demograph", { source: "vertices/V" }); +const historyStatus = pregel.history(execution); +pregel.removeHistory(); +``` + +You can also use the newly added HTTP endpoints with the +`/_api/control_pregel/history` route. + +You can still use the old interfaces (the `pregel.status()` method as well as +the `GET /_api/control_pregel` and `GET /_api/control_pregel/{id}` endpoints). + +### ArangoSearch metric + +The following ArangoSearch metric has been added in version 3.11: + +| Label | Description | +|:------|:------------| +| `arangodb_search_num_primary_docs` | Number of primary documents for current snapshot. | + +### Traffic accounting metrics + +Introduced in: v3.8.9, v3.9.6, v3.10.2 + +The following metrics for traffic accounting have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_client_user_connection_statistics_bytes_received` | Bytes received for requests, only user traffic. | +| `arangodb_client_user_connection_statistics_bytes_sent` | Bytes sent for responses, only user traffic. +| `arangodb_http1_connections_total` | Total number of HTTP/1.1 connections accepted. | + +### Configurable `CACHE_OBLIVIOUS` option for jemalloc + +Introduced in: v3.9.7, v3.10.3 + +The jemalloc memory allocator supports an option to toggle cache-oblivious large +allocation alignment. It is enabled by default up to v3.10.3, but disabled from +v3.10.4 onwards. Disabling it helps to save 4096 bytes of memory for every +allocation which is at least 16384 bytes large. This is particularly beneficial +for the RocksDB buffer cache. + +You can now configure the option by setting a `CACHE_OBLIVIOUS` environment +variable to the string `true` or `false` before starting ArangoDB. + +See [ArangoDB Server environment variables](../../components/arangodb-server/environment-variables.md) +for details. + +### WAL file tracking metrics + +Introduced in: v3.9.10, v3.10.5 + +The following metrics for write-ahead log (WAL) file tracking have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_live_wal_files` | Number of live RocksDB WAL files. | +| `rocksdb_wal_released_tick_flush` | Lower bound sequence number from which WAL files need to be kept because of external flushing needs. | +| `rocksdb_wal_released_tick_replication` | Lower bound sequence number from which WAL files need to be kept because of replication. | +| `arangodb_flush_subscriptions` | Number of currently active flush subscriptions. | + +### Number of replication clients metric + +Introduced in: v3.10.5 + +The following metric for the number of replication clients for a server has +been added: + +| Label | Description | +|:------|:------------| +| `arangodb_replication_clients` | Number of currently connected/active replication clients. | + +### Reduced memory usage of in-memory edge indexes + +Introduced in: v3.10.5 + +The memory usage of in-memory edge index caches is reduced if most of the edges +in an index refer to a single or mostly the same collection. + +Previously, the full edge IDs, consisting of the referred-to collection +name and the referred-to key of the edge, were stored in full, i.e. the full +values of the edges' `_from` and `_to` attributes. +Now, the first edge inserted into an edge index' in-memory cache determines +the collection name for which all corresponding edges can be stored +prefix-compressed. + +For example, when inserting an edge pointing to `the-collection/abc` into the +empty cache, the collection name `the-collection` is noted for that cache +as a prefix. The edge is stored in-memory as only `/abc`. Further edges +that are inserted into the cache and that point to the same collection are +also stored prefix-compressed. + +The prefix compression is transparent and does not require configuration or +setup. Compression is done separately for each cache, i.e. a separate prefix +can be used for each individual edge index, and separately for the `_from` and +`_to` parts. Lookups from the in-memory edge cache do not return compressed +values but the full-length edge IDs. The compressed values are also used +in-memory only and are not persisted on disk. + +### Sending delay metrics for internal requests + +Introduced in: v3.9.11, v3.10.6 + +The following metrics for diagnosing delays in cluster-internal network requests +have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_network_dequeue_duration` | Internal request duration for the dequeue in seconds. | +| `arangodb_network_response_duration` | Internal request duration from fully sent till response received in seconds. | +| `arangodb_network_send_duration` | Internal request send duration in seconds. | +| `arangodb_network_unfinished_sends_total` | Number of internal requests for which sending has not finished. | + +### Peak memory metric for in-memory caches + +Introduced in: v3.10.7 + +This new metric stores the peak value of the `rocksdb_cache_allocated` metric: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_peak_allocated` | Global peak memory allocation of ArangoDB in-memory caches. | + +### Number of SST files metric + +Introduced in: v3.10.7, v3.11.1 + +This new metric reports the number of RocksDB `.sst` files: + +| Label | Description | +|:------|:------------| +| `rocksdb_total_sst_files` | Total number of RocksDB sst files, aggregated over all levels. | + +### File descriptor metrics + +Introduced in: v3.10.7 + +The following system metrics have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_file_descriptors_limit` | System limit for the number of open files for the arangod process. | +| `arangodb_file_descriptors_current` | Number of file descriptors currently opened by the arangod process. | + +### More instant Hot Backups + +Introduced in: v3.10.10, v3.11.3 + +Cluster deployments no longer wait for all in-progress transactions to get +committed when a user requests a Hot Backup. The waiting could cause deadlocks +and thus Hot Backups to fail, in particular in the Arango Managed Platform (AMP). Now, Hot Backups are +created immediately and commits have to wait until the backup process is done. + +### In-memory edge cache startup options and metrics + +Introduced in: v3.11.4 + +The following startup options have been added: + +- `--cache.max-spare-memory-usage`: the maximum memory usage for spare tables + in the in-memory cache. + +- `--cache.high-water-multiplier`: controls the cache's effective memory usage + limit. The user-defined memory limit (i.e. `--cache.size`) is multiplied with + this value to create the effective memory limit, from which on the cache tries + to free up memory by evicting the oldest entries. The default value is `0.56`, + matching the previously hardcoded 56% for the cache subsystem. + + You can increase the multiplier to make the cache subsystem use more memory, but + this may overcommit memory because the cache memory reclamation procedure is + asynchronous and can run in parallel to other tasks that insert new data. + In case a deployment's memory usage is already close to the maximum, increasing + the multiplier can lead to out-of-memory (OOM) kills. + +The following metrics have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_edge_compressed_inserts_total` | Total number of compressed inserts into the in-memory edge cache. | +| `rocksdb_cache_edge_empty_inserts_total` | Total number of insertions into the in-memory edge cache for non-connected edges. | +| `rocksdb_cache_edge_inserts_total` | Total number of insertions into the in-memory edge cache. | + +### Observability of in-memory cache subsystem + +Introduced in: v3.10.11, v3.11.4 + +The following metrics have been added to improve the observability of in-memory +cache subsystem: +- `rocksdb_cache_free_memory_tasks_total`: Total number of free memory tasks + that were scheduled by the in-memory edge cache subsystem. This metric will + be increased whenever the cache subsystem schedules a task to free up memory + in one of the managed in-memory caches. It is expected to see this metric + rising when the cache subsystem hits its global memory budget. +- `rocksdb_cache_free_memory_tasks_duration_total`: Total amount of time spent + inside the free memory tasks of the in-memory cache subsystem. Free memory + tasks are scheduled by the cache subsystem to free up memory in existing cache + hash tables. +- `rocksdb_cache_migrate_tasks_total`: Total number of migrate tasks that were + scheduled by the in-memory edge cache subsystem. This metric will be increased + whenever the cache subsystem schedules a task to migrate an existing cache hash + table to a bigger or smaller size. +- `rocksdb_cache_migrate_tasks_duration_total`: Total amount of time spent inside + the migrate tasks of the in-memory cache subsystem. Migrate tasks are scheduled + by the cache subsystem to migrate existing cache hash tables to a bigger or + smaller table. + +### Detached scheduler threads + +Introduced in: v3.10.13, v3.11.5 + +A scheduler thread now has the capability to detach itself from the scheduler +if it observes the need to perform a potentially long running task, like waiting +for a lock. This allows a new scheduler thread to be started and prevents +scenarios where all threads are blocked waiting for a lock, which has previously +led to deadlock situations. + +Threads waiting for more than 1 second on a collection lock will detach +themselves. + +The following startup option has been added: +- `--server.max-number-detached-threads`: The maximum number of detached scheduler + threads. + +The following metric has been added: +- `arangodb_scheduler_num_detached_threads`: The number of worker threads + currently started and detached from the scheduler. + +### Memory usage of connection and request statistics + +Introduced in: v3.10.12, v3.11.6 + +The following metrics have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_connection_statistics_memory_usage` | Total memory usage of connection statistics. | +| `arangodb_request_statistics_memory_usage` | Total memory usage of request statistics. | + +If the `--server.statistics` startup option is set to `true`, then some +connection and request statistics are built up in memory for incoming request. +It is expected that the memory usage reported by these metrics remains +relatively constant over time. It may grow only when there are bursts of new +connections. Some memory is pre-allocated at startup for higher efficiency. If the +`--server.statistics` startup option is set to `false`, then no memory will be +allocated for connection and request statistics. + +## Client tools + +### arangodump + +#### Option to not dump Views + +_arangodump_ has a new `--dump-views` startup option to control whether +View definitions shall be included in the backup. The default value is `true`. + +#### Improved dump performance (experimental) + +Introduced in: v3.10.8, v3.11.2 + +_arangodump_ has experimental extended parallelization capabilities +to work not only at the collection level, but also at the shard level. +In combination with the newly added support for the VelocyPack format that +ArangoDB uses internally, database dumps can now be created and restored more +quickly and occupy less disk space. This major performance boost makes dumps and +restores up to several times faster, which is extremely useful when dealing +with large shards. + +- Whether the new parallel dump variant is used is controlled by the newly added + `--use-experimental-dump` startup option (introduced in v3.10.8 and v3.11.2). + The default value is `false`. + +- Optionally, you can make _arangodump_ write multiple output files per + collection/shard (introduced in v3.10.10 and v3.11.2). + The file splitting allows for better parallelization when + writing the results to disk, which in case of non-split files must be serialized. + You can enable it by setting the `--split-files` option to `true`. This option + is disabled by default because dumps created with this option enabled cannot + be restored into previous versions of ArangoDB. + +## Internal changes + +### Upgraded bundled library versions + +The bundled version of the OpenSSL library has been upgraded from 1.1.1 to 3.0.8. + +The bundled version of the zlib library has been upgraded to 1.2.13. + +The bundled version of the fmt library has been upgraded to 9.1.0. + +The bundled version of the immer library has been upgraded to 0.8.0. + +The bundled versions of the abseil-cpp, s2geometry, and wcwidth library have +been updated to more recent versions that don't have a version number. + +For ArangoDB 3.11, the bundled version of rclone is 1.62.2. Check if your +rclone configuration files require changes. + +From version 3.11.10 onward, ArangoDB uses the glibc C standard library +implementation with an LGPL-3.0 license instead of libmusl. Notably, it features +string functions that are better optimized for common CPUs. diff --git a/site/content/arangodb/4.0/_index.md b/site/content/arangodb/4.0/_index.md index 9ae04ecd74..618eda41ef 100644 --- a/site/content/arangodb/4.0/_index.md +++ b/site/content/arangodb/4.0/_index.md @@ -1,7 +1,7 @@ --- title: Recommended Resources menuTitle: '4.0' -weight: 96 +weight: 95 layout: default --- {{< cloudbanner >}} diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/_index.md b/site/content/arangodb/4.0/release-notes/version-3.12/_index.md index aa4ef4ad8f..0bff821346 100644 --- a/site/content/arangodb/4.0/release-notes/version-3.12/_index.md +++ b/site/content/arangodb/4.0/release-notes/version-3.12/_index.md @@ -1,6 +1,6 @@ --- title: Version 3.12 menuTitle: Version 3.12 -weight: 87 +weight: 86 description: '' --- diff --git a/site/content/arangodb/4.0/release-notes/version-4.0/_index.md b/site/content/arangodb/4.0/release-notes/version-4.0/_index.md index 13bbacdecc..f5c9b41d20 100644 --- a/site/content/arangodb/4.0/release-notes/version-4.0/_index.md +++ b/site/content/arangodb/4.0/release-notes/version-4.0/_index.md @@ -1,6 +1,6 @@ --- title: Version 4.0 menuTitle: Version 4.0 -weight: 86 +weight: 85 description: '' --- diff --git a/site/content/arangodb/4.0/release-notes/version-oem/_index.md b/site/content/arangodb/4.0/release-notes/version-oem/_index.md new file mode 100644 index 0000000000..ab547220a9 --- /dev/null +++ b/site/content/arangodb/4.0/release-notes/version-oem/_index.md @@ -0,0 +1,6 @@ +--- +title: Version OEM +menuTitle: Version OEM +weight: 87 +description: '' +--- diff --git a/site/content/arangodb/4.0/release-notes/version-oem/api-changes-in-oem.md b/site/content/arangodb/4.0/release-notes/version-oem/api-changes-in-oem.md new file mode 100644 index 0000000000..93fbc165b9 --- /dev/null +++ b/site/content/arangodb/4.0/release-notes/version-oem/api-changes-in-oem.md @@ -0,0 +1,876 @@ +--- +title: API Changes in ArangoDB OEM LTS +menuTitle: API changes in OEM LTS +weight: 20 +description: >- + A summary of the changes to the HTTP API and other interfaces that are relevant + for developers, like maintainers of drivers and integrations for ArangoDB +--- +## HTTP RESTful API + +### Behavior changes + +#### Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +The feature is disabled by default to ensure compatibility with existing client +drivers and applications that only support ASCII names according to the +traditional naming constraints used in previous ArangoDB versions. + +If the feature is enabled, then any endpoints that contain database, collection, +View, or index names in the URL may contain special characters that were +previously not allowed (percent-encoded). They are also to be expected in +payloads that contain database, collection, View, or index names, as well as +document identifiers (because they are comprised of the collection name and the +document key). If client applications assemble URLs with extended names +programmatically, they need to ensure that extended names are properly +URL-encoded. + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming +constraints for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +#### Stricter validation of Unicode surrogate values in JSON data + +ArangoDB 3.11 employs a stricter validation of Unicode surrogate pairs in +incoming JSON data, for all REST APIs. + +In previous versions, the following loopholes existed when validating UTF-8 +surrogate pairs in incoming JSON data: + +- a high surrogate, followed by something other than a low surrogate + (or the end of the string) +- a low surrogate, not preceded by a high surrogate + +These validation loopholes have been closed in 3.11, which means that any JSON +inputs containing such invalid surrogate pair data are rejected by the server. + +This is normally the desired behavior, as it helps invalid data from entering +the database. However, in situations when a database is known to contain invalid +data and must continue supporting it (at least temporarily), the extended +validation can be disabled by setting the server startup option +`--server.validate-utf8-strings` to `false`. This is not recommended long-term, +but only during upgrading or data cleanup. + +#### Status code if write concern not fulfilled + +The new `--cluster.failed-write-concern-status-code` startup option can be used +to change the default `403` status code to `503` when the write concern cannot +be fulfilled for a write operation to a collection in a cluster deployment. +This signals client applications that it is a temporary error. Only the +HTTP status code changes in this case, no automatic retry of the operation is +attempted by the cluster. + +#### Graph API (Gharial) + +The `POST /_api/gharial/` endpoint for creating named graphs validates the +`satellites` property of the graph `options` for SmartGraphs differently now. + +If the `satellites` property is set, it must be an array, either empty or with +one or more collection name strings. If the value is not in that format, the +error "Missing array for field `satellites`" is now returned, for example, if +it is a string or a `null` value. Previously, it returned "invalid parameter type". +If the graph is not a SmartGraph, the `satellites` property is ignored unless its +value is an array but its elements are not strings, in which case the error +"Invalid parameter type" is returned. + +#### Validation of `smartGraphAttribute` in SmartGraphs + +Introduced in: v3.10.13, v3.11.7 + +The attribute defined by the `smartGraphAttribute` graph property is not allowed to be +changed in the documents of SmartGraph vertex collections. This is now strictly enforced. +You must set the attribute when creating a document. Any attempt to modify or remove +the attribute afterward by update or replace operations now throws an error. Previously, +the `smartGraphAttribute` value was checked only when inserting documents into a +SmartGraph vertex collection, but not for update or replace operations. + +The missing checks on update and replace operations allowed to retroactively +modify the value of the `smartGraphAttribute` for existing documents, which +could have led to problems when the data of such a SmartGraph vertex collection was +replicated to a new follower shard. On the new follower shard, the documents +went through the full validation and led to documents with modified +`smartGraphAttribute` values being rejected on the follower. This could have +led to follower shards not getting in sync. + +Now, the value of the `smartGraphAttribute` is fully validated with every +insert, update, or replace operation, and every attempt to modify the value of +the `smartGraphAttribute` retroactively fails with the `4003` error, +`ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_GRAPH_ATTRIBUTE`. +Additionally, if upon insertion the `smartGraphAttribute` is missing for a +SmartGraph vertex, the error code is error `4001`, `ERROR_NO_SMART_GRAPH_ATTRIBUTE`. + +To retroactively repair the data in any of the affected collections, it is +possible to update every (affected) document with the correct value of the +`smartGraphAttribute` via an AQL query as follows: + +``` +FOR doc IN @@collection + LET expected = SUBSTRING(doc._key, 0, FIND_FIRST(doc._key, ':')) + LET actual = doc.@attr + FILTER expected != actual + UPDATE doc WITH {@attr: expected} IN @@collection + COLLECT WITH COUNT INTO updated + RETURN updated +``` + +This updates all documents with the correct (expected) value of the +`smartGraphAttribute` if it deviates from the expected value. The query +returns the number of updated documents as well. + +The bind parameters necessary to run this query are: +- `@@collection`: name of a SmartGraph vertex collection to be updated +- `@attr`: attribute name of the `smartGraphAttribute` of the collection + +#### Database API + +The `POST /_api/database` endpoint for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +#### Document API + +The following endpoints support a new `refillIndexCaches` query +parameter to repopulate the index caches after requests that insert, update, +replace, or remove single or multiple documents (including edges) if this +affects an edge index or cache-enabled persistent indexes: + +- `POST /_api/document/{collection}` +- `PATCH /_api/document/{collection}/{key}` +- `PUT /_api/document/{collection}/{key}` +- `DELETE /_api/document/{collection}/{key}` + +It is a boolean option and the default is `false`. + +This also applies to the `INSERT`, `UPDATE`, `REPLACE`, and `REMOVE` operations +in AQL queries, which support a `refillIndexCache` option, too. + +In 3.9 and 3.10, `refillIndexCaches` was experimental and limited to edge caches. + +--- + +Introduced in: v3.11.1 + +When inserting multiple documents/edges at once in a cluster, the Document API +used to let the entire request fail if any of the documents/edges failed to be +saved due to a key error. More specifically, if the value of a `_key` attribute +contains illegal characters or if the key doesn't meet additional requirements, +for instance, coming from the collection being used in a Disjoint SmartGraph, +the `POST /_api/document/{collection}` endpoint would not reply with the usual +array of either the document metadata or the error object for each attempted +document insertion. Instead, it used to return an error object for the first +offending document only, and aborted the operation with an HTTP `400 Bad Request` +status code so that none of the documents were saved. Example: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +{"code":400,"error":true,"errorMessage":"illegal document key","errorNum":1221} + +> curl http://localhost:8529/_api/document/coll/valid +{"code":404,"error":true,"errorMessage":"document not found","errorNum":1202} +``` + +Now, such key errors in cluster deployments no longer fail the entire request, +matching the behavior of single server deployments. Any errors are reported in +the result array for the respective documents, along with the successful ones: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +[{"_id":"coll/valid","_key":"valid","_rev":"_gG9JHsW---"},{"error":true,"errorNum":1221,"errorMessage":"illegal document key"}] + +> curl http://localhost:8529/_api/document/coll/valid +{"_key":"valid","_id":"coll/valid","_rev":"_gG9JHsW---"} +``` + +--- + +Introduced in: v3.11.1 + +Using the Document API for reading multiple documents used to return an error +if the request body was an empty array. Example: + +```bash +> curl -XPUT -d '[]' 'http://localhost:8529/_api/document/coll?onlyget=true' +{"code":500,"error":true,"errorMessage":"internal error","errorNum":4} +``` + +Now, a request like this succeeds and returns an empty array as response. + +#### Collection API + +The edge collections of EnterpriseGraphs and SmartGraphs (including +Disjoint SmartGraphs and SmartGraphs using SatelliteCollections but excluding +the edge collections of the SatelliteCollections) previously reported a +value of `0` as the `numberOfShards`. They now return the actual number of +shards. This value can be higher than the configured `numberOfShards` value of +the graph due to internally used hidden collections. + +#### Cursor API + +When you link a collection to an `arangosearch` View and run an AQL query +against this View while it is still being indexed, you now receive the query result +including a warning. This warning alerts you about potentially incomplete results obtained +from a partially indexed collection. The error code associated with this +warning is `1240` (`ERROR_ARANGO_INCOMPLETE_READ`). + +--- + +Introduced in: v3.9.11, v3.10.7 + +In AQL graph traversals (`POST /_api/cursor` endpoint), you can restrict the +vertex and edge collections in the traversal options like so: + +```aql +FOR v, e, p IN 1..3 OUTBOUND 'products/123' components + OPTIONS { + vertexCollections: [ "bolts", "screws" ], + edgeCollections: [ "productsToBolts", "productsToScrews" ] + } + RETURN v +``` + +If you specify collections that don't exist, queries now fail with +a "collection or view not found" error (code `1203` and HTTP status +`404 Not Found`). In previous versions, unknown vertex collections were ignored, +and the behavior for unknown edge collections was undefined. + +Additionally, the collection types are now validated. If a document collection +or View is specified in `edgeCollections`, an error is raised +(code `1218` and HTTP status `400 Bad Request`). + +Furthermore, it is now an error if you specify a vertex collection that is not +part of the specified named graph (code `1926` and HTTP status `404 Not Found`). +It is also an error if you specify an edge collection that is not part of the +named graph's definition or of the list of edge collections (code `1939` and +HTTP status `400 Bad Request`). + +#### Log API + +Setting the log level for the `graphs` log topic to `TRACE` now logs detailed +information about AQL graph traversals and (shortest) path searches. +Some new log messages are also logged for the `DEBUG` level. + +#### Disabled Foxx APIs + +Introduced in: v3.10.5 + +A `--foxx.enable` startup option has been added to _arangod_. It defaults to `true`. +If the option is set to `false`, access to Foxx services is forbidden and is +responded with an HTTP `403 Forbidden` error. Access to the management APIs for +Foxx services are also disabled as if `--foxx.api false` is set manually. + +#### Configurable whitespace in metrics + +Introduced in: v3.10.6 + +The output format of the `/_admin/metrics` and `/_admin/metrics/v2` endpoints +slightly changes for metrics with labels. By default, the metric label and value +are separated by a space for improved compatibility with some tools. This is +controlled by the new `--server.ensure-whitespace-metrics-format` startup option, +which is enabled by default from v3.10.6 onward. Example: + +Enabled: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"} 0 +``` + +Disabled: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"}0 +``` + +#### Limit to the number of databases in a deployment + +Introduced in: v3.10.10, v3.11.2 + +The new `--database.max-databases` startup option can cap the number of databases +and creating databases using the `POST /_api/database` endpoint can thus now fail +for this reason if your deployment is at or above the configured maximum. Example: + +```json +{ + "code": 400, + "error": true, + "errorMessage": "unable to create additional database because it would exceed the configured maximum number of databases (2)", + "errorNum": 32 +} +``` + +### Endpoint return value changes + +Introduced in: v3.8.8, v3.9.4, v3.10.1 + +Changed the encoding of revision IDs returned by the below listed REST APIs: + +- `GET /_api/collection//revision`: The revision ID was + previously returned as numeric value, and now it is returned as + a string value with either numeric encoding or HLC-encoding inside. +- `GET /_api/collection//checksum`: The revision ID in + the `revision` attribute was previously encoded as a numeric value + in single server, and as a string in cluster. This is now unified so + that the `revision` attribute always contains a string value with + either numeric encoding or HLC-encoding inside. + +### Endpoints added + +#### Maintenance mode for DB-Servers + +Introduced in: v3.10.1 + +For rolling upgrades or rolling restarts, DB-Servers can now be put into +maintenance mode, so that no attempts are made to re-distribute the data in a +cluster for such planned events. DB-Servers in maintenance mode are not +considered viable failover targets because they are likely restarted soon. + +To query the maintenance status of a DB-Server, use this new endpoint: + +`GET /_admin/cluster/maintenance/` + +An example reply of a DB-Server that is in maintenance mode: + +```json +{ + "error": false, + "code": 200, + "result": { + "Mode": "maintenance", + "Until": "2022-10-26T06:14:23Z" + } +} +``` + +If the DB-Server is not in maintenance mode, then the `result` attribute is +omitted: + +```json +{ + "error": false, + "code": 200, +} +``` + +To put a DB-Server into maintenance mode, use this new endpoint: + +`PUT /_admin/cluster/maintenance/` + +The payload of the request needs to be as follows, with the `timeout` in seconds: + +```json +{ + "mode": "maintenance", + "timeout": 360 +} +``` + +To turn the maintenance mode off, set `mode` to `"normal"` instead, and omit the +`timeout` attribute or set it to `0`. + +You can send another request when the DB-Server is already in maintenance mode +to extend the timeout. + +The maintenance mode ends automatically after the defined timeout. + +Also see the [HTTP interface for cluster maintenance](../../develop/http-api/cluster.md#get-the-maintenance-status-of-a-db-server). + +#### Shard usage metrics + +Introduced in: v3.10.13, v3.11.7 + +With `GET /_admin/usage-metrics` you can retrieve detailed shard usage metrics on +DB-Servers. + +These metrics can be enabled by setting the `--server.export-shard-usage-metrics` +startup option to `enabled-per-shard` to make DB-Servers collect per-shard +usage metrics, or to `enabled-per-shard-per-user` to make DB-Servers collect +usage metrics per shard and per user whenever a shard is accessed. + +For more information, see the [HTTP API description](../../develop/http-api/monitoring/metrics.md#get-usage-metrics) +and [Monitoring per collection/database/user](../version-oem/whats-new-in-oem.md#monitoring-per-collectiondatabaseuser). + +### Endpoints augmented + +#### Cursor API + +- The `POST /_api/cursor` and `POST /_api/cursor/{cursor-identifier}` endpoints + can now return an additional statistics value in the `stats` sub-attribute, + `intermediateCommits`. It is the total number of intermediate commits the + query has performed. This number can only be greater than zero for + data modification queries that perform modifications beyond the + `--rocksdb.intermediate-commit-count` or `--rocksdb.intermediate-commit-size` + thresholds. In clusters, the intermediate commits are tracked per DB-Server + that participates in the query and are summed up in the end. + +- The `/_api/cursor` endpoint accepts a new `allowRetry` attribute in the + `options` object. Set this option to `true` to make it possible to retry + fetching the latest batch from a cursor. The default is `false`. + + If retrieving a result batch fails because of a connection issue, you can ask + for that batch again using the new `POST /_api/cursor//` + endpoint. The first batch has an ID of `1` and the value is incremented by 1 + with every batch. Every result response except the last one also includes a + `nextBatchId` attribute, indicating the ID of the batch after the current. + You can remember and use this batch ID should retrieving the next batch fail. + + You can only request the latest batch again (or the next batch). + Earlier batches are not kept on the server-side. + Requesting a batch again does not advance the cursor. + + You can also call this endpoint with the next batch identifier, i.e. the value + returned in the `nextBatchId` attribute of a previous request. This advances the + cursor and returns the results of the next batch. This is only supported if there + are more results in the cursor (i.e. `hasMore` is `true` in the latest batch). + + From v3.11.1 onward, you may use the `POST /_api/cursor//` + endpoint even if the `allowRetry` attribute is `false` to fetch the next batch, + but you cannot request a batch again unless you set it to `true`. + The `nextBatchId` attribute is always present in result objects (except in the + last batch) from v3.11.1 onward. + + To allow refetching of the very last batch of the query, the server cannot + automatically delete the cursor. After the first attempt of fetching the last + batch, the server would normally delete the cursor to free up resources. As you + might need to reattempt the fetch, it needs to keep the final batch when the + `allowRetry` option is enabled. Once you successfully received the last batch, + you should call the `DELETE /_api/cursor/` endpoint so that the + server doesn't unnecessarily keep the batch until the cursor times out + (`ttl` query option). + +- When profiling a query (`profile` option `true`, `1`, or `2`), the `profile` + object returned under `extra` now includes a new `"instantiating executors"` + attribute with the time needed to create the query executors, and in cluster + mode, this also includes the time needed for physically distributing the query + snippets to the participating DB-Servers. Previously, the time spent for + instantiating executors and the physical distribution was contained in the + `optimizing plan` stage. + +- The endpoint supports a new `maxDNFConditionMembers` query option, which is a + threshold for the maximum number of `OR` sub-nodes in the internal + representation of an AQL `FILTER` condition and defaults to `786432`. + +#### Analyzer types + +The `/_api/analyzer` endpoint supports a new Analyzer type in the +Enterprise Edition: + +- [`geo_s2`](../../indexes-and-search/analyzers.md#geo_s2) (introduced in v3.10.5): + Like the existing `geojson` Analyzer, but with an additional `format` property + that can be set to `"latLngDouble"` (default), `"latLngInt"`, or `"s2Point"`. + +#### Query API + +The [`GET /_api/query/current`](../../develop/http-api/queries/aql-queries.md#list-the-running-aql-queries) +and [`GET /_api/query/slow`](../../develop/http-api/queries/aql-queries.md#list-the-slow-aql-queries) +endpoints include a new numeric `peakMemoryUsage` attribute. + +--- + +The `GET /_api/query/current` endpoint can return a new value +`"instantiating executors"` as `state` in the query list. + +#### Index API + +##### Progress indication on the index generation + +Introduced in: v3.10.13, v3.11.7 + +The `GET /_api/index` endpoint may now include a `progress` attribute for the +elements in the `indexes` array. For every index that is currently being created, +it indicates the progress of the index generation (in percent). + +To return indexes that are not yet fully built but are in the building phase, +add the `withHidden=true` query parameter to the call of the endpoint. +Note that this includes internal indexes in the response as well, such as +`arangosearch` indexes. + +``` +curl "http://localhost:8529/_api/index?collection=myCollection&withHidden=true" +``` + +##### Restriction of indexable fields + +It is now forbidden to create indexes that cover fields whose attribute names +start or end with `:` , for example, `fields: ["value:"]`. This notation is +reserved for internal use. + +Existing indexes are not affected but you cannot create new indexes with a +preceding or trailing colon using the `POST /_api/index` endpoint. + +##### Inverted indexes + +Introduced in: v3.10.2 + +[Inverted indexes](../../develop/http-api/indexes/inverted.md) support new +caching options in the Enterprise Edition. + +- A new `cache` option for inverted indexes as the default (boolean, default: + `false`) or for specific `fields` (boolean, default: the value of the + top-level `cache` option) to always cache field normalization values and + Geo Analyzer auxiliary data in memory. + +- A new `cache` option per object in the definition of the `storedValues` + elements to always cache stored values in memory (boolean, default: `false`). + +- A new `cache` option in the `primarySort` property to always cache the + primary sort columns in memory (boolean, default: `false`). + +- A new `primaryKeyCache` property for inverted indexes to always cache the + primary key column in memory (boolean, default: `false`). + +The `POST /_api/index` endpoint accepts these new options for `inverted` indexes +and the `GET /_api/index` and `GET /_api/index/` endpoints may return +these options. The attributes are omitted in responses unless you enable the +respective option. + +#### View API + +Views of the type `arangosearch` support new caching options in the +Enterprise Edition. + +Introduced in: v3.9.5, v3.10.2 + +- A `cache` option for individual View links or fields (boolean, default: `false`). +- A `cache` option in the definition of a `storedValues` View property + (boolean, immutable, default: `false`). + +Introduced in: v3.9.6, v3.10.2 + +- A `primarySortCache` View property (boolean, immutable, default: `false`). +- A `primaryKeyCache` View property (boolean, immutable, default: `false`). + +The `POST /_api/view` endpoint accepts these new options for `arangosearch` +Views, the `GET /_api/view//properties` endpoint may return these +options, and you can change the `cache` View link/field property with the +`PUT /_api/view//properties` and `PATCH /_api/view//properties` +endpoints. + +Introduced in: v3.10.3 + +You may use a shorthand notations on `arangosearch` View creation or the +`storedValues` option, like `["attr1", "attr2"]`, instead of using an array of +objects. + +See the [`arangosearch` Views Reference](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#link-properties) +for details. + +#### Pregel API + +Four new endpoints have been added to the Pregel HTTP interface for the new +persisted execution statistics for Pregel jobs: + +- `GET /_api/control_pregel/history/{id}` to retrieve the persisted execution + statistics of a specific Pregel job +- `GET /_api/control_pregel/history` to retrieve the persisted execution + statistics of all currently active and past Pregel jobs +- `DELETE /_api/control_pregel/history/{id}` to delete the persisted execution + statistics of a specific Pregel job +- `DELETE /_api/control_pregel/history` to delete the persisted execution + statistics of all Pregel jobs + +#### Cluster rebalance API + +The `POST /_admin/cluster/rebalance` and `PUT /_admin/cluster/rebalance` +endpoints support a new `excludeSystemCollections` option that lets you ignore +system collections in the shard rebalance plan. + +The `/_admin/cluster/rebalance` route (`GET`, `POST`, and `PUT` methods) returns +a new `totalShardsFromSystemCollections` property in the `shards` object of the +`result` with the number of leader shards from system collections. The adjacent +`totalShards` property may not include system collections depending on the +`excludeSystemCollections` option. + +#### Explain API + +Introduced in: v3.10.4 + +The `POST /_api/explain` endpoint for explaining AQL queries includes the +following two new statistics in the `stats` attribute of the response now: + +- `peakMemoryUsage` (number): The maximum memory usage of the query during + explain (in bytes) +- `executionTime` (number): The (wall-clock) time in seconds needed to explain + the query. + +#### Metrics API + +The following metric has been added in version 3.11: + +| Label | Description | +|:------|:------------| +| `arangodb_search_num_primary_docs` | Number of primary documents for current snapshot. | + +--- + +Introduced in: v3.10.7, v3.11.1 + +This new metric reports the number of RocksDB `.sst` files: + +| Label | Description | +|:------|:------------| +| `rocksdb_total_sst_files` | Total number of RocksDB sst files, aggregated over all levels. | + +--- + +Introduced in: v3.10.7 + +The metrics endpoints include the following new file descriptors metrics: + +- `arangodb_file_descriptors_current` +- `arangodb_file_descriptors_limit` + +--- + +Introduced in: v3.8.9, v3.9.6, v3.10.2 + +The metrics endpoints include the following new traffic accounting metrics: + +- `arangodb_client_user_connection_statistics_bytes_received` +- `arangodb_client_user_connection_statistics_bytes_sent` +- `arangodb_http1_connections_total` + +--- + +Introduced in: v3.9.6, v3.10.2 + +The metrics endpoints include the following new edge cache (re-)filling metrics: + +- `rocksdb_cache_auto_refill_loaded_total` +- `rocksdb_cache_auto_refill_dropped_total` +- `rocksdb_cache_full_index_refills_total` + +--- + +Introduced in: v3.9.10, v3.10.5 + +The following metrics for write-ahead log (WAL) file tracking have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_live_wal_files` | Number of live RocksDB WAL files. | +| `rocksdb_wal_released_tick_flush` | Lower bound sequence number from which WAL files need to be kept because of external flushing needs. | +| `rocksdb_wal_released_tick_replication` | Lower bound sequence number from which WAL files need to be kept because of replication. | +| `arangodb_flush_subscriptions` | Number of currently active flush subscriptions. | + +--- + +Introduced in: v3.10.5 + +The following metric for the number of replication clients for a server has +been added: + +| Label | Description | +|:------|:------------| +| `arangodb_replication_clients` | Number of currently connected/active replication clients. | + +--- + +Introduced in: v3.9.11, v3.10.6 + +The following metrics for diagnosing delays in cluster-internal network requests +have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_network_dequeue_duration` | Internal request duration for the dequeue in seconds. | +| `arangodb_network_response_duration` | Internal request duration from fully sent till response received in seconds. | +| `arangodb_network_send_duration` | Internal request send duration in seconds. | +| `arangodb_network_unfinished_sends_total` | Number of internal requests for which sending has not finished. | + +--- + +Introduced in: v3.10.7 + +The following metric stores the peak value of the `rocksdb_cache_allocated` metric: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_peak_allocated` | Global peak memory allocation of ArangoDB in-memory caches. | + +--- + +Introduced in: v3.11.2 + +The following metrics have been added about the LZ4 compression for values in +the in-memory edge cache: + +- `rocksdb_cache_edge_inserts_effective_entries_size_total` +- `rocksdb_cache_edge_inserts_uncompressed_entries_size_total` +- `rocksdb_cache_edge_compression_ratio` + +--- + +Introduced in: v3.10.11, v3.11.4 + +The following metrics have been added to improve the observability of in-memory +cache subsystem: + +- `rocksdb_cache_free_memory_tasks_total` +- `rocksdb_cache_free_memory_tasks_duration_total` +- `rocksdb_cache_migrate_tasks_total` +- `rocksdb_cache_migrate_tasks_duration_total` + +--- + +Introduced in: v3.11.4 + +The following metrics have been added to improve the observability of in-memory +edge cache: + +- `rocksdb_cache_edge_compressed_inserts_total` +- `rocksdb_cache_edge_empty_inserts_total` +- `rocksdb_cache_edge_inserts_total` + +--- + +Introduced in: v3.11.5 + +The following metrics have been added to monitor and detect temporary or +permanent connectivity issues as well as how many scheduler threads are in the +detached state: + +- `arangodb_network_connectivity_failures_coordinators` +- `arangodb_network_connectivity_failures_dbservers_total` +- `arangodb_scheduler_num_detached_threads` + +#### Log level API + +Introduced in: v3.10.2 + +The `GET /_admin/log/level` and `PUT /_admin/log/level` endpoints support a new +query parameter `serverId`, to forward log level get and set requests to a +specific server. This makes it easier to adjust the log levels in clusters +because DB-Servers require JWT authentication whereas Coordinators also support +authentication using usernames and passwords. + +#### Explain API + +Introduced in: v3.10.4 + +The `POST /_api/explain` endpoint for explaining AQL queries includes the +following two new statistics in the `stats` attribute of the response now: + +- `peakMemoryUsage` (number): The maximum memory usage of the query during + explain (in bytes) +- `executionTime` (number): The (wall-clock) time in seconds needed to explain + the query. + +#### Optimizer rule descriptions + +Introduced in: v3.10.9, v3.11.2 + +The `GET /_api/query/rules` endpoint now includes a `description` attribute for +every optimizer rule that briefly explains what it does. + +### Endpoints deprecated + +The `GET /_admin/database/target-version` endpoint is deprecated in favor of the +more general version API with the endpoint `GET /_api/version`. +The endpoint will be removed in ArangoDB v3.12. + +## JavaScript API + +### Database creation + +The `db._createDatabase()` method for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +### Index methods + +Calling `collection.dropIndex(...)` or `db._dropIndex(...)` now raises an error +if the specified index does not exist or cannot be dropped (for example, because +it is a primary index or edge index). The methods previously returned `false`. +In case of success, they still return `true`. + +You can wrap calls to these methods with a `try { ... }` block to catch errors, +for example, in _arangosh_ or in Foxx services. + +### AQL queries + +When you use e.g. the `db._query()` method to execute an AQL query against an +`arangosearch` View while it is still in the process of being built, +the query now includes a warning message that the results may not be +complete due to the ongoing indexing process of the View. + +The error code associated with this warning is `1240` +(`ERROR_ARANGO_INCOMPLETE_READ`). + +--- + +Introduced in: v3.9.11, v3.10.7 + +If you specify collections that don't exist in the options of AQL graph traversals +(`vertexCollections`, `edgeCollections`), queries now fail. In previous versions, +unknown vertex collections were ignored, and the behavior for unknown +edge collections was undefined. + +Additionally, queries fail if you specify a document collection or View +in `edgeCollections`. + +### Pregel module + +Two new methods have been added to the `@arangodb/pregel` module: + +- `history(...)` to get the persisted execution statistics of a specific or all + algorithm executions +- `removeHistory(...)` to delete the persisted execution statistics of a + specific or all algorithm executions + +```js +var pregel = require("@arangodb/pregel"); +const execution = pregel.start("sssp", "demograph", { source: "vertices/V" }); +const historyStatus = pregel.history(execution); +pregel.removeHistory(); +``` + +### `collection.iterate()` deprecated + +The `collection.iterate()` method is deprecated from v3.11.0 onwards and will be +removed in a future version. + +### `@arangodb/request` certificate validation + +Introduced in: v3.11.11 + +The `@arangodb/request` module now supports two additional options for making +HTTPS requests: + +- `verifyCertificates` (optional): if set to `true`, the server certificate of + the remote server is verified using the default certificate store of the system. + Default: `false`. +- `verifyDepth` (optional): limit the maximum length of the certificate chain + that counts as valid. Default: `10`. diff --git a/site/content/arangodb/4.0/release-notes/version-oem/incompatible-changes-in-oem.md b/site/content/arangodb/4.0/release-notes/version-oem/incompatible-changes-in-oem.md new file mode 100644 index 0000000000..109b41bc11 --- /dev/null +++ b/site/content/arangodb/4.0/release-notes/version-oem/incompatible-changes-in-oem.md @@ -0,0 +1,695 @@ +--- +title: Incompatible changes in ArangoDB OEM LTS +menuTitle: Incompatible changes in OEM LTS +weight: 15 +description: >- + Check the following list of potential breaking changes **before** upgrading to + this ArangoDB version and adjust any client applications if necessary +--- +## Resolving known issues with versions up to 3.11.11 + +Due to an issue with the versions up to 3.11.11, please read the +information below and follow the linked procedures to avoid a potential problem. +Not following these procedures can cause your deployment to become +read-only in rare cases. + +{{< warning >}} +If you are a paying customer with a self-hosted deployment, contact the +Arango support for direct assistance. +Arango Managed Platform (AMP) customers do not need to take any action. +{{< /warning >}} + +**Issues that has been discovered that requires action:** + +- [Issues with the comparison of large indexed numbers](#corrected-sorting-order-for-numbers-in-velocypack-indexes) + +**Who should check for a potential issue:** + +- Deployments created with a version prior to 3.11.11 + +**Deployments not impacted:** + +- Deployments created with 3.11.11 or later 3.11.x version + +**Overview of impact** + +There is a risk of the RocksDB storage engine entering a state where no write operations are +possible anymore, should it discover index entries that are in an unexpected order. + +This can occur at any time, even if a previous check reported no affected indexes, +as there is no protection against storing and indexing data that may cause issues. +To prevent RocksDB from becoming read-only at some point in the future, it is +essential to follow the linked procedures. + +{{< tip >}} +It is recommended to schedule a maintenance time window for taking the ArangoDB +deployment offline to perform the upgrade procedure in the safest possible manner. +{{< /tip >}} + +**Paths to resolution:** + +| Current version | Resolved version | Steps to take | +|-----------------|------------------|---------------| +| 3.11.10 (or older) | 3.11.11 (or newer 3.11.x) | Create a backup, upgrade normally (following the standard [Upgrade path](../../operations/upgrading/_index.md#upgrade-paths) all the way to the latest 3.11.x version), then check for [affected numbers in indexes](#corrected-sorting-order-for-numbers-in-velocypack-indexes) and fix them. | +| 3.11.11 (or newer 3.11.x) | 3.12.4 (or newer) | **Do not upgrade to version 3.12.0, 3.12.1, 3.12.2, or 3.12.3**. Create a backup, check for [affected numbers in indexes](#corrected-sorting-order-for-numbers-in-velocypack-indexes) and fix them (if you haven't done so already or created the deployment with 3.11.11 or a later 3.11.x version), then upgrade to the latest 3.11.x version first, and finally upgrade to version 3.12.4 or later. | + +## Incompatibilities due to switch to glibc + +From version 3.11.10 onward, ArangoDB uses the glibc C standard library +implementation instead of libmusl. Even though glibc is statically linked into +the ArangoDB server and client tool executables, it may load additional modules +at runtime that are installed on your system. Under rare circumstances, it is +possible that ArangoDB crashes when performing host name or address lookups. +This is only the case if all of the following conditions are true: + +- You either use ArangoDB version 3.11.10 (non-hotfix), or you use a 3.11 version + from 3.11.10-1 onward with the `--honor-nsswitch` startup option enabled. +- You use an ArangoDB package on bare metal (not a Docker container) +- Your operating system uses glibc (like Ubuntu, Debian, RedHat, Centos, or + most other Linux distributions, but not Alpine for instance) +- The glibc version of your system is different than the one used by ArangoDB, + in particular if the system glibc is older than version 2.35 +- The `libnss-*` dynamic libraries are installed +- The `/etc/nsswitch.conf` configuration file contains settings other than for + `files` and `dns` in the `hosts:` line, or the `passwd:` and `group:` lines + contain something other than `files` + +If you are affected, consider using Docker containers, `chroot`, or change +`nsswitch.conf`. + +## VelocyStream protocol deprecation + +ArangoDB's own bi-directional asynchronous binary protocol VelocyStream (VST) is +deprecated in v3.11 and removed in v3.12.0. + +While VelocyStream support is still available in v3.11, it is highly recommended +to already switch to the HTTP(S) protocol because of better performance and +reliability. ArangoDB supports both VelocyPack and JSON over HTTP(S). + +## Active Failover deployment mode deprecation + +Running a single server with asynchronous replication to one or more passive +single servers for automatic failover is deprecated and will no longer be +supported in the next minor version of ArangoDB, from v3.12 onward. + +## Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +The old `--database.extended-names-databases` startup option should no longer +be used, but if you do, it behaves the same as the new +`--database.extended-names` option. + +The feature is disabled by default to ensure compatibility with existing client +drivers and applications that only support ASCII names according to the +traditional naming constraints used in previous ArangoDB versions. + +If the feature is enabled, then any endpoints that contain database, collection, +View, or index names in the URL may contain special characters that were +previously not allowed (percent-encoded). They are also to be expected in +payloads that contain database, collection, View, or index names, as well as +document identifiers (because they are comprised of the collection name and the +document key). If client applications assemble URLs with extended names +programmatically, they need to ensure that extended names are properly +URL-encoded. + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming +constraints for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +## No AQL user-defined functions (UDF) in `PRUNE` + +AQL user-defined functions (UDFs) cannot be used inside traversal PRUNE conditions +nor inside FILTER conditions that can be moved into the traversal execution on DB-Servers. +This limitation also applies to single servers to keep the differences to cluster +deployments minimal. + +## Stricter validation of Unicode surrogate values in JSON data + +ArangoDB 3.11 employs a stricter validation of Unicode surrogate pairs in +incoming JSON data, for all REST APIs. + +In previous versions, the following loopholes existed when validating UTF-8 +surrogate pairs in incoming JSON data: + +- a high surrogate, followed by something other than a low surrogate + (or the end of the string) +- a low surrogate, not preceded by a high surrogate + +These validation loopholes have been closed in 3.11, which means that any JSON +inputs containing such invalid surrogate pair data are rejected by the server. + +This is normally the desired behavior, as it helps invalid data from entering +the database. However, in situations when a database is known to contain invalid +data and must continue supporting it (at least temporarily), the extended +validation can be disabled by setting the server startup option +`--server.validate-utf8-strings` to `false`. This is not recommended long-term, +but only during upgrading or data cleanup. + +## Restriction of indexable fields + +It is now forbidden to create indexes that cover fields whose attribute names +start or end with `:` , for example, `fields: ["value:"]`. This notation is +reserved for internal use. + +Existing indexes are not affected but you cannot create new indexes with a +preceding or trailing colon. + +## Write-write conflict improvements + +Writes to the same document in quick succession can result in write-write +conflicts, requiring you to retry the operations. In v3.11, single document +operations via the [HTTP Interface for Documents](../../develop/http-api/documents.md) try to +avoid conflicts by locking the key of the document before performing the +modification. This serializes the write operations on the same document. +The behavior of AQL queries, Stream Transactions, and multi-document operations +remains unchanged. + +It is still possible for write-write conflicts to occur, and in these cases the +reported error is now slightly different. + +The lock acquisition on the key of the document that is supposed to be +inserted/modified has a hard-coded timeout of 1 second. If the lock cannot be +acquired, the error message is as follows: + +``` +Timeout waiting to lock key - in index primary of type primary over '_key'; conflicting key: +``` + +The `` corresponds to the document key of the write attempt. In addition, +the error object contains `_key`, `_id`, and `_rev` attributes. The `_key` and +`_id` correspond to the document of the write attempt, and `_rev` corresponds +to the current revision of the document as stored in the database (if available, +otherwise empty). + +If the lock cannot be acquired on a unique index entry, the error message is as +follows: + +``` +Timeout waiting to lock key - in index of type persistent over ''; document key: ; indexed values: [] +``` + +The `` is the name of the index in which the write attempt tried to +lock the entry, `` is the list of fields included in that index, `` +corresponds to the document key of the write attempt, and `` +corresponds to the indexed values of the document. In addition, the error object +contains `_key`, `_id`, and `_rev` attributes. The `_key` and `_id` correspond +to the document of the write attempt, and `_rev` corresponds to the current +revision of the document as stored in the database (if available, otherwise empty). + +## Deprecated and removed Pregel features + +- The experimental _Custom Pregel_ feature, also known as + _programmable Pregel algorithms_ (PPA), has been removed. + +- The built-in _DMID_ Pregel algorithm has been deprecated and will be removed + in a future release. + +- The `async` option for Pregel jobs has been removed. Some algorithms supported + an asynchronous mode to run without synchronized global iterations. This is no + longer supported. + +- The `useMemoryMaps` option for Pregel jobs to use memory-mapped files as a + backing storage for large datasets has been removed. Memory paging/swapping + provided by the operating system is equally effective. + +## New query stage + +- When profiling a query (`profile` option `true`, `1`, or `2`), the `profile` + object returned under `extra` now includes a new `"instantiating executors"` + attribute with the time needed to create the query executors, and in cluster + mode, this also includes the time needed for physically distributing the query + snippets to the participating DB-Servers. Previously, the time spent for + instantiating executors and the physical distribution was contained in the + `optimizing plan` stage. + +- The `state` of a query can now additionally be `"instantiating executors"` in + the list of currently running queries. + +## Limit for the normalization of `FILTER` conditions + +Converting complex AQL `FILTER` conditions with a lot of logical branches +(`AND`, `OR`, `NOT`) into the internal DNF (disjunctive normal form) format can +take a large amount of processing time and memory. The new `maxDNFConditionMembers` +query option is a threshold for the maximum number of `OR` sub-nodes in the +internal representation and defaults to `786432`. + +You can also set the threshold globally instead of per query with the +[`--query.max-dnf-condition-members` startup option](../../components/arangodb-server/options.md#--querymax-dnf-condition-members). + +If the threshold is hit, the query continues with a simplified representation of +the condition, which is **not usable in index lookups**. However, this should +still be better than overusing memory or taking a very long time to compute the +DNF version. + +## Validation of `smartGraphAttribute` in SmartGraphs + +Introduced in: v3.10.13, v3.11.7 + +The attribute defined by the `smartGraphAttribute` graph property is not allowed to be +changed in the documents of SmartGraph vertex collections. This is now strictly enforced. +See [API Changes in ArangoDB 3.11](api-changes-in-oem.md#validation-of-smartgraphattribute-in-smartgraphs) +for details and instructions on how to repair affected attributes. + +## Validation of traversal collection restrictions + +Introduced in: v3.9.11, v3.10.7 + +In AQL graph traversals, you can restrict the vertex and edge collections in the +traversal options like so: + +```aql +FOR v, e, p IN 1..3 OUTBOUND 'products/123' components + OPTIONS { + vertexCollections: [ "bolts", "screws" ], + edgeCollections: [ "productsToBolts", "productsToScrews" ] + } + RETURN v +``` + +If you specify collections that don't exist, queries now fail with +a "collection or view not found" error (code `1203` and HTTP status +`404 Not Found`). In previous versions, unknown vertex collections were ignored, +and the behavior for unknown edge collections was undefined. + +Additionally, the collection types are now validated. If a document collection +or View is specified in `edgeCollections`, an error is raised +(code `1218` and HTTP status `400 Bad Request`). + +Furthermore, it is now an error if you specify a vertex collection that is not +part of the specified named graph (code `1926` and HTTP status `404 Not Found`). +It is also an error if you specify an edge collection that is not part of the +named graph's definition or of the list of edge collections (code `1939` and +HTTP status `400 Bad Request`). + +## Batch insertions of documents with key errors no longer fail the entire operation + +Introduced in: v3.11.1 + +When inserting multiple documents/edges at once in a cluster, the Document API +used to let the entire request fail if any of the documents/edges failed to be +saved due to a key error. More specifically, if the value of a `_key` attribute +contains illegal characters or if the key doesn't meet additional requirements, +for instance, coming from the collection being used in a Disjoint SmartGraph, +the `POST /_api/document/{collection}` endpoint would not reply with the usual +array of either the document metadata or the error object for each attempted +document insertion. Instead, it used to return an error object for the first +offending document only, and aborted the operation so that none of the documents +were saved. Example: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +{"code":400,"error":true,"errorMessage":"illegal document key","errorNum":1221} + +> curl http://localhost:8529/_api/document/coll/valid +{"code":404,"error":true,"errorMessage":"document not found","errorNum":1202} +``` + +Now, such key errors in cluster deployments no longer fail the entire request, +matching the behavior of single server deployments. Any errors are reported in +the result array for the respective documents, along with the successful ones: + +```bash +> curl -d '[{"_key":"valid"},{"_key":"invalid space"}]' http://localhost:8529/_api/document/coll +[{"_id":"coll/valid","_key":"valid","_rev":"_gG9JHsW---"},{"error":true,"errorNum":1221,"errorMessage":"illegal document key"}] + +> curl http://localhost:8529/_api/document/coll/valid +{"_key":"valid","_id":"coll/valid","_rev":"_gG9JHsW---"} +``` + +## Exit code adjustments + +Introduced in: v3.10.13, v3.11.7 + +For some fatal errors like a required database upgrade or a failed version check, +_arangod_ set the generic exit code of `1`. It now returns a different, more +specific exit code in these cases. + +## Batch-reading an empty list of documents succeeds + +Introduced in: v3.11.1 + +Using the Document API for reading multiple documents used to return an error +if the request body was an empty array. Example: + +```bash +> curl -XPUT -d '[]' 'http://localhost:8529/_api/document/coll?onlyget=true' +{"code":500,"error":true,"errorMessage":"internal error","errorNum":4} +``` + +Now, a request like this succeeds and returns an empty array as response. + +## Corrected sorting order for numbers in VelocyPack indexes + +Introduced in: v3.11.11, v3.12.2 + +- [Issues with the comparison of large indexed numbers](#issues-with-the-comparison-of-large-indexed-numbers) +- [Check if you are affected](#check-if-you-are-affected) +- [If the deployment is NOT affected](#if-the-deployment-is-not-affected) +- [If the deployment is affected](#if-the-deployment-is-affected) + +### Issues with the comparison of large indexed numbers + +If you store very large numeric values in ArangoDB – greater than/equal to +253 (9,007,199,254,740,992) or less than/equal to +-253 (-9,007,199,254,740,992) – and index them with an affected +index type, the values may not be in the correct order. This is due to how the +comparison is executed in versions before v3.11.11 and v3.12.2. If the numbers +are represented using different VelocyPack types internally, they are converted +to doubles and then compared. This conversion is lossy for integers with a very +large absolute value, resulting in an incorrect ordering of the values. + +The possibly affected index types are the following that allow storing +VelocyPack data in them: +- `persistent` (including vertex-centric indexes) +- `mdi-prefixed` (but not `mdi` indexes; only available from v3.12.0 onward) +- `hash` (legacy alias for persistent indexes) +- `skiplist` (legacy alias for persistent indexes) + +{{< warning >}} +The incorrect sort order in an index can lead to the RocksDB storage engine +discovering out-of-order keys and then refusing further write operations with +errors and warnings. +{{< /warning >}} + +To prevent ArangoDB deployments from entering a read-only mode due to this issue, +please follow the below procedures to check if your deployment is affected and +how to correct it if necessary. + +### Check if you are affected + +The following procedure is recommended for every deployment unless it has been +created with v3.11.11, v3.12.2, or any later version. + +1. Create a backup as a precaution. If you run the Enterprise Edition, you can + create a Hot Backup. Otherwise, create a full dump with _arangodump_ + (including all databases and system collections). + +2. If your deployment is on a 3.11.x version older than 3.11.11, upgrade to + the latest 3.11 version that is available. + + If your deployment is on version 3.12.0 or 3.12.1, upgrade to the latest + 3.12 version that is available but be sure to also read about the string + sorting issue in [Resolving known issues with versions prior to 3.12.4](../../../3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md#resolving-known-issues-with-versions-prior-to-3124) + and the linked upgrade procedures. + +3. Call the `GET /_admin/cluster/vpackSortMigration/check` endpoint to let + ArangoDB check all indexes. As it can take a while for large deployments, + it is recommended to run this operation as an asynchronous job + (`x-arango-async: store` header) so that you can check the result later. + + The endpoint is available for all deployment modes, not only in clusters. + In case of a cluster, send the request to one of the Coordinators. + Example with ArangoDB running locally on the default port: + + ```shell + curl --dump-header - -H "x-arango-async: store" http://localhost:8529/_admin/cluster/vpackSortMigration/check + ``` + +4. Inspect the response to find the job ID in the `X-Arango-Async-Id` HTTP header. + The job ID is `12345` in the following example: + + ``` + HTTP/1.1 202 Accepted + X-Arango-Queue-Time-Seconds: 0.000000 + Strict-Transport-Security: max-age=31536000 ; includeSubDomains + Expires: 0 + Pragma: no-cache + Cache-Control: no-cache, no-store, must-revalidate, pre-check=0, post-check=0, max-age=0, s-maxage=0 + Content-Security-Policy: frame-ancestors 'self'; form-action 'self'; + X-Content-Type-Options: nosniff + X-Arango-Async-Id: 12345 + Server: ArangoDB + Connection: Keep-Alive + Content-Type: text/plain; charset=utf-8 + Content-Length: 0 + ``` + +5. Call the `PUT /_api/job/12345` endpoint, substituting `12345` with your + actual job ID. It returns nothing if the job is still ongoing. You can repeat + this call every once in a while to check again. + + ```shell + curl -XPUT http://localhost:8529/_api/job/12345 + ``` + +6. If there are no issues with your deployment, the check result reports an + empty list of affected indexes and an according message. + + ```json + { + "error": false, + "code": 200, + "result": { + "affected": [], + "error": false, + "errorCode": 0, + "errorMessage": "all good with sorting order" + } + } + ``` + + If this is the case, continue with + [If the deployment is NOT affected](#if-the-deployment-is-not-affected). + + If affected indexes are found, the check result looks similar to this: + + ```json + { + "error": false, + "code": 200, + "result": { + "affected": [ + { + "database": "_system", + "collection": "coll", + "indexId": 195, + "indexName": "idx_1806192152446763008" + } + ], + "error": true, + "errorCode": 1242, + "errorMessage": "some indexes have legacy sorted keys" + } + } + ``` + + If this is the case, continue with + [If the deployment is affected](#if-the-deployment-is-affected). + +### If the deployment is NOT affected + +1. Make sure that no problematic values are written to or removed from an index + between checking for affected indexes and completing the procedure. + To be safe, you may want to stop all writes to the database system. + +2. You can perform a regular in-place upgrade and mark the deployment as correct + using a special HTTP API endpoint in the next step. + + That is, create a backup and upgrade your deployment to the + latest bugfix version with the same major and minor version (e.g. from 3.11.x + to at least 3.11.11 or from 3.12.x to at least 3.12.2). + +3. Call the `PUT /_admin/cluster/vpackSortMigration/migrate` endpoint to mark + the deployment as having the correct sorting order. This requires + [superuser permissions](../../develop/http-api/authentication.md#jwt-superuser-tokens) + unless authentication is disabled. + + ```shell + curl -H "Authorization: bearer " -XPUT http://localhost:8529/_admin/cluster/vpackSortMigration/migrate + ``` + + ```json + { + "error": false, + "code": 200, + "result": { + "error": false, + "errorCode": 0, + "errorMessage": "VPack sorting migration done." + } + } + ``` + +4. For the corrected sorting order to take effect, restart the ArangoDB server, + respectively restart the DB-Servers of the cluster. + +5. Complete the procedure by resuming writes to the database systems. + +### If the deployment is affected + +{{< info >}} +If you are a customer, please contact the Arango support to assist you with +the following steps. +{{< /info >}} + +1. This step depends on the deployment mode: + + - **Single server**: Create a new server. Then create a full dump with + [arangodump](../../components/tools/arangodump/_index.md) of the old server, + using the `--all-databases` and `--include-system-collections` startup options + and a user account with administrate access to the `_system` database and + at least read access to all other databases to ensure all data including + the `_users` system collection are dumped. + + Restore the dump to the new single server using at least v3.11.11 or v3.12.4 + (v3.12.2 only addresses this but not [another issue](../../../3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md#corrected-sorting-order-for-strings-in-velocypack-indexes)). + You need to use a new database directory. + + - **Active Failover**: You need to replace all servers of the deployment. + You can do so in a rolling manner. + + Create a new server and add it as a new follower to the deployment. + When it is in-sync with the leader, remove one of the old followers. + Replace any other old followers in the same manner. Then create + one more new server, add it as a follower, and wait until it is in-sync. + Then remove the old leader, failing over to one of the new followers. + You should stop all writes temporarily before and after the failover so + that nothing is lost, as the Active Failover replication is asynchronous. + + You can also follow the single server instructions if it's acceptable to + have downtime. + + - **Cluster**: Replace the DB-Server nodes until they all run at least + v3.11.11 or v3.12.4 (rolling upgrade). Syncing new nodes writes the data in + the correct order. This deployment mode and approach avoids downtimes. + + For each DB-Server, add a new DB-Server node to the cluster. Wait until all + new DB-Servers are in sync, then clean out the old DB-Server nodes. + +2. New instances using the fixed versions initialize the database directory + with the sorting order marked as correct and also restore data from dumps + correctly. There is no need to call the `.../vpackSortMigration/migrate` + HTTP API endpoint like in the unaffected case. + +3. If you revert to an older state with affected indexes by restoring a + Hot Backup, you need to repeat the procedure. + +## Changed JSON serialization and VelocyPack format for replication + +Introduced in: v3.11.12, v3.12.3 + +While there is only one number type in JSON, the VelocyPack format that ArangoDB +uses supports different numeric data types. When converting between VelocyPack +and JSON, it was previously possible for precision loss to occur in edge cases. +This also affected creating and restoring dumps with arangodump and arangorestore. + +A double (64-bit floating-point) value `1152921504606846976.0` (260) +used to be serialized to `1152921504606847000` in JSON, which deserializes back +to `1152921504606846976` when using a double. However, the serialized value got +parsed as an unsigned integer, resulting in an incorrect value of +`1152921504606847000`. + +Numbers with an absolute value greater or equal to 253 and less than +264 (which always represents an integer) are now serialized faithfully +to JSON using an integer conversion routine and then `.0` is appended (e.g. +`1152921504606846976.0`) to ensure that they get parsed back to the exact same +double value. All other values are serialized as before, e.g. small integral +values don't get `.0` appended, and they get parsed back to integers with the +same numerical value. + +Moreover, replication-related APIs such as the `/_api/wal/tail` endpoint now +support the VelocyPack format. The cluster replication has been changed to use +VelocyPack instead of JSON to avoid unnecessary conversions and avoiding any +risk of deviations due to the serialization. + +## JavaScript API + +### Database creation + +The `db._createDatabase()` method for creating a new database has changed. +If the specified database name is invalid/illegal, it now returns the error code +`1208` (`ERROR_ARANGO_ILLEGAL_NAME`). It previously returned `1229` +(`ERROR_ARANGO_DATABASE_NAME_INVALID`) in this case. + +This is a downwards-incompatible change, but unifies the behavior for database +creation with the behavior of collection and View creation, which also return +the error code `1208` in case the specified name is not allowed. + +### Index methods + +Calling `collection.dropIndex(...)` or `db._dropIndex(...)` now raises an error +if the specified index does not exist or cannot be dropped (for example, because +it is a primary index or edge index). The methods previously returned `false`. +In case of success, they still return `true`. + +You can wrap calls to these methods with a `try { ... }` block to catch errors, +for example, in _arangosh_ or in Foxx services. + +## Startup options + +### `--server.disable-authentication` and `--server.disable-authentication-unix-sockets` obsoleted + +The `--server.disable-authentication` and `--server.disable-authentication-unix-sockets` +startup options are now obsolete. Specifying them is still tolerated but has +no effect anymore. These options were deprecated in v3.0 and mapped to +`--server.authentication` and `--server.authentication-unix-sockets`, which +made them do the opposite of what their names suggest. + +### `--database.force-sync-properties` deprecated + +The `--database.force-sync-properties` option was useful with the MMFiles +storage engine, which has been removed in v3.7. The option does not have any +useful effect if you use the RocksDB storage engine. From v3.11.0 onwards, it +has no effect at all, is deprecated, and will be removed in a future version. + +### `--agency.pool-size` deprecated + +The `--agency.pool-size` option was effectively not properly supported in any +version of ArangoDB. Setting the option to anything but the value of +`--agency.size` should be avoided. + +From v3.11.0 onwards, this option is deprecated, and setting it to a value +different than the value of `--agency.size` leads to a startup error. + +### `--query.parallelize-gather-writes` obsoleted + +Parallel gather is now enabled by default and supported for most queries. +The `--query.parallelize-gather-writes` startup option has no effect anymore, +but specifying it still tolerated. + +See [Features and Improvements in ArangoDB 3.11](whats-new-in-oem.md#parallel-gather) +for details. + +### `--pregel.memory-mapped-files*` obsoleted + +Pregel no longer supports use memory-mapped files as a backing storage. +The following startup options have therefore been removed: + +- `--pregel.memory-mapped-files` +- `--pregel.memory-mapped-files-custom-path` +- `--pregel.memory-mapped-files-location-type` + +You can still specify them on startup without raising errors but they have no +effect anymore. + +## Client tools + +### arangoexport + +The default output file type produced by arangoexport, controlled by the `--type` +startup option, has been changed from `json` to `jsonl`. +This allows for more efficient processing of the files produced by arangoexport +with other tools, such as arangoimport, by default. diff --git a/site/content/arangodb/4.0/release-notes/version-oem/known-issues-in-oem.md b/site/content/arangodb/4.0/release-notes/version-oem/known-issues-in-oem.md new file mode 100644 index 0000000000..05ecdda870 --- /dev/null +++ b/site/content/arangodb/4.0/release-notes/version-oem/known-issues-in-oem.md @@ -0,0 +1,61 @@ +--- +title: Known Issues in ArangoDB OEM LTS +menuTitle: Known Issues in OEM LTS +weight: 10 +description: >- + Important issues affecting the OEM LTS versions of the ArangoDB suite of products +--- +Note that this page does not list all open issues. + +## ArangoSearch + +| Issue | +|------------| +| **Date Added:** 2018-12-19
**Component:** ArangoSearch
**Deployment Mode:** Single-server
**Description:** Value of `_id` attribute indexed by `arangosearch` View may become inconsistent after renaming a collection
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#514](https://github.com/arangodb/backlog/issues/514) (internal) | +| **Date Added:** 2018-12-03
**Component:** ArangoSearch
**Deployment Mode:** Cluster
**Description:** Score values evaluated by corresponding score functions (BM25/TFIDF) may differ in single-server and cluster with a collection having more than 1 shard
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#508](https://github.com/arangodb/backlog/issues/508) (internal) | +| **Date Added:** 2018-12-03
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** Using a loop variable in expressions within a corresponding SEARCH condition is not supported
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#318](https://github.com/arangodb/backlog/issues/318) (internal) | +| **Date Added:** 2019-06-25
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** The `primarySort` attribute in `arangosearch` View definitions cannot be set via the web interface. The option is immutable, but the web interface does not allow to set any View properties upfront (it creates a View with default parameters before the user has a chance to configure it).
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2020-03-19
**Component:** ArangoSearch
**Deployment Mode:** All
**Description:** Operators and functions in `SEARCH` clauses of AQL queries which compare values such as `>`, `>=`, `<`, `<=`, `IN_RANGE()` and `STARTS_WITH()` neither take the server language (`--default-language`) nor the Analyzer locale into account. The alphabetical order of characters as defined by a language is thus not honored and can lead to unexpected results in range queries.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/backlog#679](https://github.com/arangodb/backlog/issues/679) (internal) | + +## AQL + +| Issue | +|------------| +| **Date Added:** 2018-09-05
**Component:** AQL
**Deployment Mode:** Cluster
**Description:** In a very uncommon edge case there is an issue with an optimization rule in the cluster. If you are running a cluster and use a custom shard key on a collection (default is `_key`) **and** you provide a wrong shard key in a modifying query (`UPDATE`, `REPLACE`, `DELETE`) **and** the wrong shard key is on a different shard than the correct one, a `DOCUMENT NOT FOUND` error is returned instead of a modification (example query: `UPDATE { _key: "123", shardKey: "wrongKey"} WITH { foo: "bar" } IN mycollection`). Note that the modification always happens if the rule is switched off, so the suggested workaround is to [deactivate the optimizing rule](../../aql/execution-and-performance/query-optimization.md#turning-specific-optimizer-rules-off) `restrict-to-single-shard`.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/arangodb#6399](https://github.com/arangodb/arangodb/issues/6399) | + +## Upgrading + +| Issue | +|------------| +| **Date Added:** 2019-05-16
**Component:** arangod
**Deployment Mode:** All
**Description:** Bugfix release upgrades such as 3.4.4 to 3.4.5 may not create a backup of the database directory even if they should. Please create a copy manually before upgrading.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x (Windows and Linux)
**Fixed in Versions:** -
**Reference:** [arangodb/planning#3745](https://github.com/arangodb/planning/issues/3745) (internal) | +| **Date Added:** 2019-12-10
**Component:** Installer
**Deployment Mode:** All
**Description:** The NSIS installer for Windows may fail to upgrade an existing installation, e.g. from 3.4.a to 3.4.b (patch release), with the error message: "failed to detect whether we need to Upgrade"
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/release-qa#183](https://github.com/arangodb/release-qa/issues/183) (internal) | +| **Date Added:** 2020-01-07
**Component:** Installer
**Deployment Mode:** All
**Description:** The NSIS installer for Windows can fail to add the path to the ArangoDB binaries to the `PATH` environment variable, silently or with an error.
**Affected Versions:** 3.4.x, 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/release-qa#183](https://github.com/arangodb/release-qa/issues/183) (internal) | +| **Date Added:** 2023-06-06
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** During a cluster upgrade while the supervision is deactivated (maintenance mode), upgraded DB-Server nodes are incorrectly reported to still have the old server version. The versions are visible in the Agency as well as in the **NODES** section of the web interface.
**Affected Versions:** 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-1409](https://arangodb.atlassian.net/browse/BTS-1409) (internal) | + +## Hot Backup + +| Issue | +|------------| +| **Date Added:** 2019-10-09
**Component:** Hot Backup API / arangobackup
**Deployment Mode:** All
**Description:** The Hot Backup feature is not supported in the Windows version of ArangoDB at this point in time.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-10-09
**Component:** Hot Backup API / arangobackup
**Deployment Mode:** DC2DC
**Description:** Hot Backup functionality in Datacenter-to-Datacenter Replication setups is experimental and may not work.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-10-09
**Component:** arangobackup
**Deployment Mode:** All
**Description:** The startup option `--operation` works as positional argument only, e.g. `arangobackup list`. The alternative syntax `arangobackup --operation list` is not accepted.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | + +## Other + +| Issue | +|------------| +| **Date Added:** 2019-05-16
**Component:** Starter
**Deployment Mode:** All
**Description:** The ArangoDB Starter falls back to the IP `[::1]` under macOS. If there is no entry `::1 localhost` in the `/etc/hosts` file or the option `--starter.disable-ipv6` is passed to the starter to use IPv4, then it will hang during startup.
**Affected Versions:** 0.14.3 (macOS only)
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-05-24
**Component:** Web Interface
**Deployment Mode:** Active Failover
**Description:** The web interface shows a wrong replication mode in the replication tab in Active Failover deployments sometimes. It may display Leader/Follower mode (the default value) because of timeouts if `/_api/cluster/endpoints` is requested too frequently.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2019-04-03
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** Updating the properties of a collection in the cluster may return before the properties are updated consistently on all shards. This is especially visible when setting a schema for a collection with multiple shards, and then instantly starting to store non-conforming documents into the collection. These may be accepted until the properties change has been fully propagated to all shards.
**Affected Versions:** 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2021-04-07
**Component:** arangod
**Deployment Mode:** All
**Description:** The Batch API (HTTP endpoint `/_api/batch`) cannot be used in combination with Stream transactions to submit batched requests, because the required header `x-arango-trx-id` is not forwarded. It only processes `Content-Type` and `Content-Id`.
**Affected Versions:** 3.5.x, 3.6.x, 3.7.x, 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [arangodb/arangodb#13552](https://github.com/arangodb/arangodb/issues/13552) | +| **Date Added:** 2021-08-06
**Component:** Installer
**Deployment Mode:** Single Server
**Description:** The Windows installer fails during database initialization with the error `failed to locate tzdata` if there are non-ASCII characters in the destination path.
**Affected Versions:** 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-531](https://arangodb.atlassian.net/browse/BTS-531) (internal) | +| **Date Added:** 2022-09-29
**Component:** ArangoDB Starter
**Deployment Mode:** All
**Description:** The ArangoDB Starter may fail to pick a Docker container name from cgroups.
**Affected Versions:** 3.8.x, 3.9.x, 3.10.x, 3.11.x
**Fixed in Versions:** -
**Reference:** [GT-207](https://arangodb.atlassian.net/browse/GT-207) (internal) | +| **Date Added:** 2023-05-25
**Component:** Web Interface
**Deployment Mode:** All
**Description:** When accessing the Web Interface with Chromium-based browsers, the following issue may occur: the **Download JSON** button in the **Collections** page and the **Download** button in the **Queries** page don't work.
**Affected Versions:** 3.11.x
**Fixed in Versions:** 3.11.2
**Reference:** [BTS-1424](https://arangodb.atlassian.net/browse/BTS-1424) (internal) | +| **Date Added:** 2023-05-25
**Component:** arangod
**Deployment Mode:** Single Server
**Description:** After an upgrade to 3.11.0 from an older version of ArangoDB with existing data, the following issue may occur: when you start a Pregel run or request its status, the Pregel command fails with `ArangoError 1203: _pregel_queries...`. As a workaround, you can manually create the collection by running `db._create("_pregel_queries", { isSystem: true });` in arangosh.
**Affected Versions:** 3.11.0
**Fixed in Versions:** 3.11.1
**Reference:** [arangodb/arangodb#19101](https://github.com/arangodb/arangodb/pull/19101) | +| **Date Added:** 2023-05-25
**Component:** arangod
**Deployment Mode:** All
**Description:** When starting an async job by sending a request with the `x-arango-async: store` or `x-arango-async: keep` HTTP header **and** additionally sending the `accept-encoding: gzip` or `accept-encoding: deflate` HTTP header , the generated response may be compressed twice when fetching the async job's response later via the `/_api/job/` REST API.
**Affected Versions:** 3.11.0
**Fixed in Versions:** 3.11.1
**Reference:** [arangodb/arangodb#19103](https://github.com/arangodb/arangodb/pull/19103) | +| **Date Added:** 2023-06-15
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** After an upgrade to 3.11.0 from an older version of ArangoDB with existing data, the following issue may occur after performing a HotBackup: `_pregel_queries` collections do not exist.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** [BTS-1462](https://arangodb.atlassian.net/browse/BTS-1462) (internal) | +| **Date Added:** 2023-06-16
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** If more than a certain threshold of queries on the same Coordinator get into the shutdown networking code at the same time, all of them lock up and the Coordinator does not longer process requests.
**Affected Versions:** 3.9.x. 3.10.x, 3.11.x
**Fixed in Versions:** 3.9.12, 3.10.8, 3.11.2
**Reference:** [BTS-1486](https://arangodb.atlassian.net/browse/BTS-1486) (internal) | +| **Date Added:** 2024-03-21
**Component:** arangod
**Deployment Mode:** All
**Description:** When creating an `inverted` index with the `inBackground` option enabled, HTTP API calls like `http://localhost:8529/_api/index?collection=&withHidden=true` don't return the `isBuilding` and `progress` attributes and the progress of the index building can thus not be observed.
**Affected Versions:** 3.10.13, 3.11.7
**Fixed in Versions:** -
**Reference:** [BTS-1788](https://arangodb.atlassian.net/browse/BTS-1788) (internal) | +| **Date Added:** 2024-07-03
**Component:** arangod
**Deployment Mode:** All
**Description:** ArangoDB can crash if run on bare metal and the Linux distribution uses a different glibc version, the `libnss-*` libraries are installed, and the `/etc/nsswitch.conf` configuration file contains settings other than for `files` and `dns` in the `hosts:` line, or the `passwd:` and `group:` lines contain something other than `files`. If you use a fixed version, it can still crash under these circumstances if you enable the `--honor-nsswitch` startup option.
**Affected Versions:** 3.11.10 (non-hotfix)
**Fixed in Versions:** 3.11.10-1
**Reference:** [Incompatibility due to switch to glibc](incompatible-changes-in-oem.md#incompatibilities-due-to-switch-to-glibc) | +| **Date Added:** 2025-01-30
**Component:** arangod
**Deployment Mode:** Cluster
**Description:** If the invariant is violated that ArangoDB's data is not modified while a server is down, manually as well as automatically triggered operations such as moving shards can make a DB-Server the leader (again) even though it may not have the correct data. ArangoDB does currently not protect against certain cases like bringing a DB-Server back without data (by accident or on purpose), which can lead to this empty state getting replicated across the cluster and thus causing data loss.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | +| **Date Added:** 2025-06-25
**Component:** Web interface
**Deployment Mode:** All
**Description:** Specifying an `X-Script-Name` HTTP header in requests to the web interface (`/_admin/aardvark`) to add a path prefix is non-functional. The feature was originally added in version 3.0 for basic proxy setups but doesn't adequately handle the requests of certain internal services.
**Affected Versions:** 3.11.x
**Fixed in Versions:** -
**Reference:** N/A | diff --git a/site/content/arangodb/4.0/release-notes/version-oem/whats-new-in-oem.md b/site/content/arangodb/4.0/release-notes/version-oem/whats-new-in-oem.md new file mode 100644 index 0000000000..e50c7abb6b --- /dev/null +++ b/site/content/arangodb/4.0/release-notes/version-oem/whats-new-in-oem.md @@ -0,0 +1,1454 @@ +--- +title: Features and Improvements in ArangoDB OEM LTS +menuTitle: What's New in OEM LTS +weight: 5 +description: >- + Improved performance and reporting for AQL queries, new caching features for + indexed data, improvements to the web interface +--- +The following list shows in detail which features have been added or improved in +ArangoDB OEM LTS. ArangoDB OEM LTS also contains several bug fixes that are not listed +here. + +## ArangoSearch + +### Late materialization improvements + +The number of disk reads required when executing search queries with late +materialization optimizations applied has been reduced so that less data needs +to be requested from the RocksDB storage engine. + +### ArangoSearch column cache (Enterprise Edition) + +[`arangosearch` Views](../../indexes-and-search/arangosearch/arangosearch-views-reference.md) support new caching options. + +Introduced in: v3.9.5, v3.10.2 + +- You can enable the new `cache` option for individual View links or fields + to always cache field normalization values in memory. This can improve the + performance of scoring and ranking queries. + + It also enables caching of auxiliary data used for querying fields that are + indexed with Geo Analyzers. This can improve the performance of geo-spatial + queries. + +- You can enable the new `cache` option in the definition of a `storedValues` + View property to always cache stored values in memory. This can improve the + query performance if stored values are involved. + +--- + +Introduced in: v3.9.6, v3.10.2 + +- You can enable the new `primarySortCache` View property to always cache the + primary sort columns in memory. This can improve the performance of queries + that utilize the primary sort order. + +- You can enable the new `primaryKeyCache` View property to always cache the + primary key column in memory. This can improve the performance of queries + that return many documents. + +--- + +[Inverted indexes](../../develop/http-api/indexes/inverted.md) also support similar new caching +options. + +Introduced in: v3.10.2 + +- A new `cache` option for inverted indexes as the default or for specific + `fields` to always cache field normalization values and Geo Analyzer auxiliary + data in memory. + +- A new `cache` option per object in the definition of the `storedValues` + elements to always cache stored values in memory. + +- A new `cache` option in the `primarySort` property to always cache the + primary sort columns in memory. + +- A new `primaryKeyCache` property for inverted indexes to always cache the + primary key column in memory. + +--- + +The cache size can be controlled with the new `--arangosearch.columns-cache-limit` +startup option and monitored via the new `arangodb_search_columns_cache_size` +metric. + +ArangoSearch caching is only available in the Enterprise Edition. + +See [Optimizing View and inverted index query performance](../../indexes-and-search/arangosearch/performance.md) +for examples. + +{{< info >}} +If you use ArangoSearch caching in supported 3.9 versions and upgrade an +Active Failover deployment to 3.10, you may need to re-configure the +cache-related options and thus recreate inverted indexes and Views. See +[Known Issues in 3.10](../version-3.10/known-issues-in-3-10.md#arangosearch). +{{< /info >}} + +## Analyzers + +### `geo_s2` Analyzer (Enterprise Edition) + +Introduced in: v3.10.5 + +This new Analyzer lets you index GeoJSON data with inverted indexes or Views +similar to the existing `geojson` Analyzer, but it internally uses a format for +storing the geo-spatial data that is more efficient. + +You can choose between different formats to make a tradeoff between the size on +disk, the precision, and query performance: + +- 8 bytes per coordinate pair using 4-byte integer values, with limited precision. +- 16 bytes per coordinate pair using 8-byte floating-point values, which is still + more compact than the VelocyPack format used by the `geojson` Analyzer +- 24 bytes per coordinate pair using the native Google S2 format to reduce the number + of computations necessary when you execute geo-spatial queries. + +This feature is only available in the Enterprise Edition. + +See [Analyzers](../../indexes-and-search/analyzers.md#geo_s2) for details. + +## Web interface + +### New graph viewer + +The graph viewer for visualizing named graphs has been reimplemented based on +the [vis.js](https://visjs.org/) library, the interface +has been redesigned to be cleaner and rewritten to use the React framework, +and the overall performance has been improved. + +The available **Layout** algorithms are **forceAtlas2** and **hierarchical**. +Force-based layouts try to avoid overlaps while grouping adjacent nodes together. +The new hierarchical layout is useful for strict topologies like trees. + +A new feature is the ability to search the visible graph to center a specific +vertex. Another quality-of-life improvement is the **Start node** setting listing +the graph's vertex collections and the available document keys, that you can +also search by. + +![New graph viewer](../../../../images/graphViewer.png) + +You can still switch to the old graph viewer if desired. + +See the [Graph Viewer](../../components/web-interface/graphs.md) documentation for +details. + +### `search-alias` Views + +The 3.11 release of ArangoDB introduces a new web interface for Views that lets +you to create and manage [`search-alias` Views](../../indexes-and-search/arangosearch/search-alias-views-reference.md). + +Through this dialog, you can easily create a new View and add to it one or more +inverted indexes from your collections that you could otherwise do via the HTTP +or JavaScript API. + +When opening your newly created View, you can copy mutable properties from +previously created `search-alias` Views, providing a convenient way to apply +the same settings to multiple Views. In addition, the JSON editor offers the +option to directly write the definition of your View in JSON format. + +For more information, see the +[detailed guide](../../indexes-and-search/arangosearch/search-alias-views-reference.md#create-search-alias-views-using-the-web-interface). + +### `arangosearch` Views + +The existing way of creating and managing `arangosearch` Views through the +web interface has been redesigned, offering a more straightforward approach to add +or modify the definition of your View. The settings, links, and JSON editor have +been merged into a single page, allowing for a much quicker workflow. + +For more information, see the +[detailed guide](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#create-arangosearch-views-using-the-web-interface). + +### Inverted indexes + +The web interface now includes the option for creating +[inverted indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md) on collections. You can set all the +properties directly in the web interface, which previously required the JavaScript +or HTTP API. It also offers an editor where you can write the definition of +your inverted index in JSON format. + +### New sorting mechanism and search box for Saved Queries + +When working with **Saved Queries** in the web interface, you can now +configure their sort order so that your saved queries are listed by the +date they were last modified. +This is particularly helpful when you have a large amount of saved custom +queries and want to see which ones have been created or used recently. + +In addition, the web interface also offers a search box which helps you +quickly find the query you're looking for. + +## AQL + +### Parallel gather + +On Coordinators in cluster deployments, results from different DB-Servers are +combined into a stream of results. This process is called gathering. It shows as +`GatherNode` nodes in the execution plan of AQL queries. + +Previously, a cluster AQL query could only parallelize a `GatherNode` if the +DB-Server query part above it (in terms of query execution plan layout) was a +terminal part of the query. That means that it was not allowed for other nodes of +type `ScatterNode`, `GatherNode`, or `DistributeNode` to be present in the query. + +Modification queries were also not allowed to use parallel gather unless the +`--query.parallelize-gather-writes` startup option was enabled, which defaulted +to `false`. + +From v3.11.0 onward, these limitations are removed so that parallel gather can be +used in almost all queries. As a result, the feature is enabled by default and +the `--query.parallelize-gather-writes` startup option is now obsolete. You can +still disable the optimization by disabling the `parallelize-gather` AQL +optimizer rule. + +The only case where parallel gather is not supported is when using traversals, +although there are some exceptions for Disjoint SmartGraphs where the traversal +can run completely on the local DB-Server (only available in the Enterprise Edition). + +The parallel gather optimization can not only speed up queries quite significantly, +but also overcome issues with the previous serial processing within `GatherNode` +nodes, which could lead to high memory usage on Coordinators caused by buffering +of documents for other shards, and timeouts on some DB-Servers because query parts +were idle for too long. + +### Optimized access of last element in traversals + +If you use a `FOR` operation for an AQL graph traversal like `FOR v, e, p IN ...` +and later access the last vertex or edge via the path variable `p`, like +`FILTER p.vertices[-1].name == "ArangoDB"` or `FILTER p.edges[-1].weight > 5`, +the access is transformed to use the vertex variable `v` or edge variable `e` +instead, like `FILTER v.name == "ArangoDB"` or `FILTER e.weight > 5`. This is +cheaper to compute because the path variable `p` may not need to be computed at +all, and it can enable further optimizations that are not possible on `p`. + +The new `optimize-traversal-last-element-access` optimization rule appears in +query execution plans if this optimization is applied. + +### Faster bulk `INSERT` operations in clusters + +AQL `INSERT` operations that insert multiple documents can now be faster in +cluster deployments by avoiding unnecessary overhead that AQL queries typically +require for the setup and shutdown in a cluster, as well as for the internal +batching. + +This improvement also decreases the number of HTTP requests to the DB-Servers. +Instead of batching the array of documents (with a default batch size of `1000`), +a single request per DB-Server is used internally to transfer the data. + +The optimization brings the AQL `INSERT` performance close to the performance of +the specialized HTTP API for [creating multiple documents](../../develop/http-api/documents.md#create-multiple-documents). + +The pattern that is recognized by the optimizer is as follows: + +```aql +FOR doc IN INSERT doc INTO collection +``` + +`` can either be a bind parameter, a variable, or an array literal. +The value needs to be an array of objects and be known at query compile time. + +```aql +Query String (43 chars, cacheable: false): + FOR doc IN @docs INSERT doc INTO collection + +Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode COOR 1 * ROOT + 2 CalculationNode COOR 1 - LET #2 = [ { "value" : 1 }, { "value" : 2 }, { "value" : 3 } ] /* json expression */ /* const assignment */ + 5 MultipleRemoteModificationNode COOR 3 - FOR doc IN #2 INSERT doc IN collection + +Indexes used: + none + +Optimization rules applied: + Id RuleName + 1 remove-data-modification-out-variables + 2 optimize-cluster-multiple-document-operations +``` + +The query runs completely on the Coordinator. The `MultipleRemoteModificationNode` +performs a bulk document insert for the whole input array in one go, internally +using a transaction that is more lightweight for transferring the data to the +DB-Servers than a regular AQL query. + +Without the optimization, the Coordinator requests data from the DB-Servers +(`GatherNode`), but the DB-Servers have to contact the Coordinator in turn to +request their data (`DistributeNode`), involving a network request for every +batch of documents: + +```aql +Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode COOR 1 * ROOT + 2 CalculationNode COOR 1 - LET #2 = [ { "value" : 1 }, { "value" : 2 }, { "value" : 3 } ] /* json expression */ /* const assignment */ + 3 EnumerateListNode COOR 3 - FOR doc IN #2 /* list iteration */ + 9 CalculationNode COOR 3 - LET #4 = MAKE_DISTRIBUTE_INPUT_WITH_KEY_CREATION(doc, null, { "allowSpecifiedKeys" : false, "ignoreErrors" : false, "collection" : "collection" }) /* simple expression */ + 5 DistributeNode COOR 3 - DISTRIBUTE #4 + 6 RemoteNode DBS 3 - REMOTE + 4 InsertNode DBS 0 - INSERT #4 IN collection + 7 RemoteNode COOR 0 - REMOTE + 8 GatherNode COOR 0 - GATHER /* parallel, unsorted */ +``` + +The new `optimize-cluster-multiple-document-operations` optimizer rule that +enables the optimization is only applied if there is no `RETURN` operation, +which means you cannot use `RETURN NEW` or similar to access the new documents +including their document keys. Additionally, all preceding calculations must be +constant, which excludes any subqueries that read documents. + +See the list of [optimizer rules](../../aql/execution-and-performance/query-optimization.md#optimizer-rules) +for details. + +### Index cache refilling + +The [edge cache refilling](../version-3.10/whats-new-in-3-10.md#edge-cache-refilling-experimental) +feature introduced in v3.9.6 and v3.10.2 is no longer experimental. From v3.11.0 +onward, it is called _**index** cache refilling_ and is not limited to edge caches +anymore, but also supports in-memory hash caches of persistent indexes +(persistent indexes with the `cacheEnabled` option set to `true`). + +This new feature automatically refills the in-memory index caches. +When documents (including edges) are added, modified, or removed and if this +affects an edge index or cache-enabled persistent indexes, these changes are +tracked and a background thread tries to update the index caches accordingly if +the feature is enabled, by adding new, updating existing, or deleting and +refilling cache entries. + +You can enable it for individual `INSERT`, `UPDATE`, `REPLACE`, and `REMOVE` +operations in AQL queries (using `OPTIONS { refillIndexCaches: true }`), for +individual document API requests that insert, update, replace, or remove single +or multiple documents (by setting `refillIndexCaches=true` as query +parameter), as well as enable it by default using the new +`--rocksdb.auto-refill-index-caches-on-modify` startup option. + +The new `--rocksdb.auto-refill-index-caches-queue-capacity` startup option +restricts how many index cache entries the background thread can queue at most. +This limits the memory usage for the case of the background thread being slower +than other operations that invalidate index cache entries. + +The background refilling is done on a best-effort basis and not guaranteed to +succeed, for example, if there is no memory available for the cache subsystem, +or during cache grow/shrink operations. A background thread is used so that +foreground write operations are not slowed down by a lot. It may still cause +additional I/O activity to look up data from the storage engine to repopulate +the cache. + +In addition to refilling the index caches, the caches can also automatically be +seeded on server startup. Use the new `--rocksdb.auto-fill-index-caches-on-startup` +startup option to enable this feature. It may cause additional CPU and I/O load. +You can limit how many index filling operations can execute concurrently with the +`--rocksdb.max-concurrent-index-fill-tasks` option. The lower this number, the +lower the impact of the cache filling, but the longer it takes to complete. + +The following metrics are available: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_auto_refill_loaded_total` | Total number of queued items for in-memory index caches refilling. +| `rocksdb_cache_auto_refill_dropped_total` | Total number of dropped items for in-memory index caches refilling. +| `rocksdb_cache_full_index_refills_total` | Total number of in-memory index caches refill operations for entire indexes. + +This feature is experimental. + +Also see: +- [AQL `INSERT` operation](../../aql/high-level-operations/insert.md#refillindexcaches) +- [AQL `UPDATE` operation](../../aql/high-level-operations/update.md#refillindexcaches) +- [AQL `REPLACE` operation](../../aql/high-level-operations/replace.md#refillindexcaches) +- [AQL `REMOVE` operation](../../aql/high-level-operations/remove.md#refillindexcaches) +- [Document HTTP API](../../develop/http-api/documents.md) +- [Index cache refill options](#index-cache-refill-options) + +### Retry request for result batch + +You can retry the request for the latest result batch of an AQL query cursor if +you enable the new `allowRetry` query option. See +[API Changes in ArangoDB 3.11](api-changes-in-oem.md#cursor-api) +for details. + +### `COLLECT ... INTO` can use `hash` method + +Grouping with the `COLLECT` operation supports two different methods, `hash` and +`sorted`. For `COLLECT` operations with an `INTO` clause, only the `sorted` method +was previously supported, but the `hash` variant has been extended to now support +`INTO` clauses as well. + +```aql +FOR i IN 1..10 + COLLECT v = i % 2 INTO group // OPTIONS { method: "hash" } + SORT null + RETURN { v, group } +``` + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 2 CalculationNode 1 - LET #3 = 1 .. 10 /* range */ /* simple expression */ + 3 EnumerateListNode 10 - FOR i IN #3 /* list iteration */ + 4 CalculationNode 10 - LET #5 = (i % 2) /* simple expression */ + 5 CollectNode 8 - COLLECT v = #5 INTO group KEEP i /* hash */ + 8 CalculationNode 8 - LET #9 = { "v" : v, "group" : group } /* simple expression */ + 9 ReturnNode 8 - RETURN #9 +``` + +The query optimizer automatically chooses the `hash` method for the above +example query, but you can also specify your preferred method explicitly. + +See the [`COLLECT` options](../../aql/high-level-operations/collect.md#method) for details. + +### K_SHORTEST_PATHS performance improvements + +The `K_SHORTEST_PATHS` graph algorithm in AQL has been refactored in ArangoDB 3.11, +resulting in major performance improvements. The query now returns the +shortest paths between two documents in a graph up to 100 times faster. + +### Added AQL functions + +Added the `DATE_ISOWEEKYEAR()` function that returns the ISO week number, +like `DATE_ISOWEEK()` does, but also the year it belongs to: + +```aql +RETURN DATE_ISOWEEKYEAR("2023-01-01") // { "week": 52, "year": 2022 } +``` + +See [AQL Date functions](../../aql/functions/date.md#date_isoweekyear) for details. + +--- + +Added the `SHA256()` function that calculates the SHA256 checksum for a string +and returns it in a hexadecimal string representation. + +```aql +RETURN SHA256("ArangoDB") // "acbd84398a61fcc6fd784f7e16c32e02a0087fd5d631421bf7b5ede5db7fda31" +``` + +See [AQL String functions](../../aql/functions/string.md#sha256) for details. + +### Extended query explain statistics + +Introduced in: v3.10.4 + +The query explain result now includes the peak memory usage and execution time. +This helps finding queries that use a lot of memory or take long to build the +execution plan. + +The additional statistics are displayed at the end of the output in the +web interface (using the **Explain** button in the **QUERIES** section) and in +_arangosh_ (using `db._explain()`): + +``` +44 rule(s) executed, 1 plan(s) created, peak mem [b]: 32768, exec time [s]: 0.00214 +``` + +The HTTP API returns the extended statistics in the `stats` attribute when you +use the `POST /_api/explain` endpoint: + +```json +{ + ... + "stats": { + "rulesExecuted": 44, + "rulesSkipped": 0, + "plansCreated": 1, + "peakMemoryUsage": 32768, + "executionTime": 0.00241307167840004 + } +} +``` + +Also see: +- [API Changes in ArangoDB 3.11](api-changes-in-oem.md#explain-api) +- [The AQL query optimizer](../../aql/execution-and-performance/query-optimization.md#optimizer-statistics) + +### Extended peak memory usage reporting + +The peak memory usage of AQL queries is now also reported for running queries +and slow queries. + +In the web interface, you can find the **Peak memory usage** column in the +**QUERIES** section, in the **Running Queries** and **Slow Query History** tabs. + +In the JavaScript and HTTP APIs, the value is reported as `peakMemoryUsage`. +See [API Changes in ArangoDB 3.11](api-changes-in-oem.md#query-api). + +### Number of cluster requests in profiling output + +Introduced in: v3.9.5, v3.10.2 + +The query profiling output in the web interface and _arangosh_ now shows the +number of HTTP requests for queries that you run against cluster deployments in +the `Query Statistics`: + +```aql +Query String (33 chars, cacheable: false): + FOR doc IN coll + RETURN doc._key + +Execution plan: + Id NodeType Site Calls Items Filtered Runtime [s] Comment + 1 SingletonNode DBS 3 3 0 0.00024 * ROOT + 9 IndexNode DBS 3 0 0 0.00060 - FOR doc IN coll /* primary index scan, index only (projections: `_key`), 3 shard(s) */ + 3 CalculationNode DBS 3 0 0 0.00025 - LET #1 = doc.`_key` /* attribute expression */ /* collections used: doc : coll */ + 7 RemoteNode COOR 6 0 0 0.00227 - REMOTE + 8 GatherNode COOR 2 0 0 0.00209 - GATHER /* parallel, unsorted */ + 4 ReturnNode COOR 2 0 0 0.00008 - RETURN #1 + +Indexes used: + By Name Type Collection Unique Sparse Cache Selectivity Fields Stored values Ranges + 9 primary primary coll true false false 100.00 % [ `_key` ] [ ] * + +Optimization rules applied: + Id RuleName + 1 scatter-in-cluster + 2 distribute-filtercalc-to-cluster + 3 remove-unnecessary-remote-scatter + 4 reduce-extraction-to-projection + 5 parallelize-gather + +Query Statistics: + Writes Exec Writes Ign Scan Full Scan Index Cache Hits/Misses Filtered Requests Peak Mem [b] Exec Time [s] + 0 0 0 0 0 / 0 0 9 32768 0.00564 +``` + +### New stage in query profiling output + +Introduced in: v3.10.3 + +The query profiling output has a new `instantiating executors` stage. +The time spent in this stage is the time needed to create the query executors +from the final query execution time. In cluster mode, this stage also includes +the time needed for physically distributing the query snippets to the +participating DB-Servers. Previously, the time spent for instantiating executors +and the physical distribution was contained in the `optimizing plan` stage. + +``` +Query Profile: + Query Stage Duration [s] + initializing 0.00001 + parsing 0.00009 + optimizing ast 0.00001 + loading collections 0.00001 + instantiating plan 0.00004 + optimizing plan 0.00088 + instantiating executors 0.00153 + executing 1.27349 + finalizing 0.00091 +``` + +### Limit for the normalization of `FILTER` conditions + +Converting complex AQL `FILTER` conditions with a lot of logical branches +(`AND`, `OR`, `NOT`) into the internal DNF (disjunctive normal form) format can +take a large amount of processing time and memory. The new `maxDNFConditionMembers` +query option is a threshold for the maximum number of `OR` sub-nodes in the +internal representation and defaults to `786432`. + +You can also set the threshold globally instead of per query with the +[`--query.max-dnf-condition-members` startup option](../../components/arangodb-server/options.md#--querymax-dnf-condition-members). + +If the threshold is hit, the query continues with a simplified representation of +the condition, which is **not usable in index lookups**. However, this should +still be better than overusing memory or taking a very long time to compute the +DNF version. + +## Server options + +### Telemetrics + +Starting with version 3.11, ArangoDB automatically gathers information on how +it is used and the features being utilized. This data is used to identify the +primary usage patterns and features, and to measure their adoption rate. + +The information collected by ArangoDB is anonymous and purely statistical. +It does not contain any personal information like usernames or IP addresses, nor +any content of the documents stored in ArangoDB. This means that your privacy is +protected, and that there is no risk of your data being compromised. + +If for any reason you prefer not to share usage statistics with ArangoDB, you +can easily disable this feature by setting the new `--server.telemetrics-api` +startup option to `false`. The default value is `true`. + +For a detailed list of what anonymous metrics ArangoDB collects see +[Telemetrics](../../operations/administration/telemetrics.md). + +### Extended naming constraints for collections, Views, and indexes + +In ArangoDB 3.9, the `--database.extended-names-databases` startup option was +added to optionally allow database names to contain most UTF-8 characters. +The startup option has been renamed to `--database.extended-names` in 3.11 and +now controls whether you want to use the extended naming constraints for +database, collection, View, and index names. + +This feature is **experimental** in ArangoDB 3.11, but will become the norm in +a future version. + +Running the server with the option enabled provides support for extended names +that are not comprised within the ASCII table, such as Japanese or Arabic +letters, emojis, letters with accentuation. Also, many ASCII characters that +were formerly banned by the traditional naming constraints are now accepted. + +Example collection, View, and index names that can be used with the new extended +constraints: `España`, `😀`, `犬`, `كلب`, `@abc123`, `København`, `München`, +`Бишкек`, `abc? <> 123!` + +Using extended collection and View names in the JavaScript API such as in +_arangosh_ or Foxx may require using the square bracket notation instead of the +dot notation for property access depending on the characters you use: + +```js +db._create("🥑~колекція =)"); +db.🥑~колекція =).properties(); // dot notation (syntax error) +db["🥑~колекція =)"].properties() // square bracket notation +``` + +Using extended collection and View names in AQL queries requires wrapping the +name in backticks or forward ticks (see [AQL Syntax](../../aql/fundamentals/syntax.md#names)): + +```aql +FOR doc IN `🥑~колекція =)` + RETURN doc +``` + +When using extended names, any Unicode characters in names need to be +[NFC-normalized](http://unicode.org/reports/tr15/#Norm_Forms). +If you try to create a database, collection, View, or index with a non-NFC-normalized +name, the server rejects it. + +The ArangoDB web interface as well as the _arangobench_, _arangodump_, +_arangoexport_, _arangoimport_, _arangorestore_, and _arangosh_ client tools +ship with support for the extended naming constraints, but they require you +to provide NFC-normalized names. + +Note that the default value for `--database.extended-names` is `false` +for compatibility with existing client drivers and applications that only support +ASCII names according to the traditional naming constraints used in previous +ArangoDB versions. Enabling the feature may lead to incompatibilities up to the +ArangoDB instance becoming inaccessible for such drivers and client applications. + +Please be aware that dumps containing extended names cannot be restored +into older versions that only support the traditional naming constraints. In a +cluster setup, it is required to use the same naming constraints for all +Coordinators and DB-Servers of the cluster. Otherwise, the startup is +refused. In DC2DC setups, it is also required to use the same naming constraints +for both datacenters to avoid incompatibilities. + +Also see: +- [Collection names](../../concepts/data-structure/collections.md#collection-names) +- [View names](../../concepts/data-structure/views.md#view-names) +- Index names have the same character restrictions as collection names + +### Verify `.sst` files + +The new `--rocksdb.verify-sst` startup option lets you validate the `.sst` files +currently contained in the database directory on startup. If set to `true`, +on startup, all SST files in the `engine-rocksdb` folder in the database +directory are validated, then the process finishes execution. +The default value is `false`. + +### Support for additional value suffixes + +Numeric startup options support suffixes like `m` (megabytes) and `GiB` (gibibytes) +to make it easier to specify values that are expected in bytes. The following +suffixes are now also supported: + +- `tib`, `TiB`, `TIB`: tebibytes (factor 10244) +- `t`, `tb`, `T`, `TB`: terabytes (factor 10004) +- `b`, `B`: bytes (factor 1) + +Example: `arangod --rocksdb.total-write-buffer-size 2TiB` + +See [Suffixes for numeric options](../../operations/administration/configuration.md#suffixes-for-numeric-options) +for details. + +### Configurable status code if write concern not fulfilled + +In cluster deployments, you can use a replication factor greater than `1` for +collections. This creates additional shard replicas for redundancy. For write +operations to these collections, you can define how many replicas need to +acknowledge the write for the operation to succeed. This option is called the +write concern. If there are not enough in-sync replicas available, the +write concern cannot be fulfilled. An error with the HTTP `403 Forbidden` +status code is returned immediately in this case. + +You can now change the status code via the new +`--cluster.failed-write-concern-status-code` startup option. It defaults to `403` +but you can set it to `503` to use an HTTP `503 Service Unavailable` status code +instead. This signals client applications that it is a temporary error. + +Note that no automatic retry of the operation is attempted by the cluster if you +set the startup option to `503`. It only changes the status code to one that +doesn't signal a permanent error like `403` does. +It is up to client applications to retry the operation. + +### RocksDB BLOB storage (experimental) + +From version 3.11 onward, ArangoDB can make use of RocksDB's integrated BLOB +(binary large object) storage for larger documents, called _BlobDB_. +This is currently an experimental feature, not supported and should not be used in production. + +[BlobDB is an integral part of RocksDB](https://rocksdb.org/blog/2021/05/26/integrated-blob-db.html) +and provides a key-value separation: large values are stored in dedicated BLOB +files, and only a small pointer to them is stored in the LSM tree's SST files. +Storing values separate from the keys means that the values do no need to be moved +through the LSM tree by the compaction. This reduces write amplification and is +especially beneficial for large values. + +When the option is enabled in ArangoDB, the key-value separation is used for +the documents column family, because large values are mostly to be expected here. +The cutoff value for the key-value separation is configurable by a startup option, +i.e. the administrator can set a size limit for values from which onwards they +are offloaded to separate BLOB files. This allows storing small documents +inline with the keys as before, but still benefit from reduced write amplification +for larger documents. + +BlobDB is disabled by default in ArangoDB 3.11. +Using BlobDB in ArangoDB is experimental and not recommended in production. It is +made available as an experimental feature so that further tests and tuning can be +done by interested parties. Future versions of ArangoDB may declare the feature +production-ready and even enable BlobDB by default. + +There are currently a few caveats when using BlobDB in ArangoDB: + +- Even though BlobDB can help reduce the write amplification, it may increase the + read amplification and may worsen the read performance for some workloads. +- The various tuning parameters that BlobDB offers are made available in ArangoDB, + but the current default settings for the BlobDB tuning options are not ideal + for many use cases and need to be adjusted by administrators first. +- It is very likely that the default settings for the BlobDB tuning options will + change in future versions of ArangoDB. +- Memory and disk usage patterns are different to that of versions running without + BlobDB enabled. It is very likely that memory limits and disk capacity may + need to be adjusted. +- Some metrics for observing RocksDB do not react properly when BlobDB is in use. +- The built-in throttling mechanism for controlling the write-throughput + slows down writes too much when BlobDB is used. This can be circumvented with + tuning parameters, but the defaults may be too aggressive. + +The following experimental startup options have been added in ArangoDB 3.11 to +enable and configure BlobDB: + +- `--rocksdb.enable-blob-files`: Enable the usage of BLOB files for the + documents column family. This option defaults to `false`. All following + options are only relevant if this option is set to `true`. +- `--rocksdb.min-blob-size`: Size threshold for storing large documents in + BLOB files (in bytes, 0 = store all documents in BLOB files). +- `--rocksdb.blob-file-size`: Size limit for BLOB files in the documents + column family (in bytes). Note that RocksDB counts the size of uncompressed + BLOBs before checking if a new BLOB file needs to be started, even though + the BLOB may be compressed and end up much smaller than uncompressed. +- `--rocksdb.blob-compression-type`: Compression algorithm to use for BLOB + data in the documents column family. +- `--rocksdb.enable-blob-garbage-collection`: Enable BLOB garbage collection + during compaction in the documents column family. +- `--rocksdb.blob-garbage-collection-age-cutoff`: Age cutoff for garbage + collecting BLOB files in the documents column family (percentage value from + 0 to 1 determines how many BLOB files are garbage collected during + compaction). +- `--rocksdb.blob-garbage-collection-force-threshold`: Garbage ratio + threshold for scheduling targeted compactions for the oldest BLOB files + in the documents column family. + +Note that ArangoDB's built-in throttling mechanism that automatically adjusts +the write rate for RocksDB may need to be reconfigured as well to see the +benefits of BlobDB. The relevant startup options for the throttle are: + +- `--rocksdb.throttle-lower-bound-bps` +- `--rocksdb.throttle-max-write-rate` +- `--rocksdb.throttle-slow-down-writes-trigger` + +### `--query.max-dnf-condition-members` option + +See [Limit for the normalization of `FILTER` conditions](#limit-for-the-normalization-of-filter-conditions). + +### `--rocksdb.reserve-file-metadata-memory` option + +This new startup option controls whether to account for `.sst` file metadata +memory in the block cache. + +### ArangoSearch column cache limit + +Introduced in: v3.9.5, v3.10.2 + +The new `--arangosearch.columns-cache-limit` startup option lets you control how +much memory (in bytes) the [ArangoSearch column cache](#arangosearch-column-cache-enterprise-edition) +is allowed to use. + +Introduced in: v3.10.6 + +You can reduce the memory usage of the column cache in cluster deployments by +only using the cache for leader shards with the new +[`--arangosearch.columns-cache-only-leader` startup option](../../components/arangodb-server/options.md#--arangosearchcolumns-cache-only-leader). +It is disabled by default, which means followers also maintain a column cache. + +### AQL query logging + +Introduced in: v3.9.5, v3.10.2 + +There are three new startup options to configure how AQL queries are logged: + +- `--query.log-failed` for logging all failed AQL queries, to be used during + development or to catch unexpected failed queries in production (off by default) +- `--query.log-memory-usage-threshold` to define a peak memory threshold from + which on a warning is logged for AQL queries that exceed it (default: 4 GB) +- `--query.max-artifact-log-length` for controlling the length of logged query + strings and bind parameter values. Both are truncated to 4096 bytes by default. + +### Index cache refill options + +Introduced in: v3.9.6, v3.10.2 + +- `--rocksdb.auto-refill-index-caches-on-modify`: Whether to automatically + (re-)fill in-memory index cache entries on insert/update/replace operations + by default. Default: `false`. +- `--rocksdb.auto-refill-index-caches-queue-capacity`: How many changes can be + queued at most for automatically refilling the index cache. Default: `131072`. +- `--rocksdb.auto-fill-index-caches-on-startup`: Whether to automatically fill + the in-memory index cache with entries on server startup. Default: `false`. +- `--rocksdb.max-concurrent-index-fill-tasks`: The maximum number of index fill + tasks that can run concurrently on server startup. Default: the number of + cores divided by 8, but at least `1`. + +--- + +Introduced in: v3.9.10, v3.10.5 + +- `--rocksdb.auto-refill-index-caches-on-followers`: Control whether automatic + refilling of in-memory caches should happen on followers or only leaders. + The default value is `true`, i.e. refilling happens on followers, too. + +### Cluster supervision options + +Introduced in: v3.9.6, v3.10.2 + +The following new options allow you to delay supervision actions for a +configurable amount of time. This is desirable in case DB-Servers are restarted +or fail and come back quickly because it gives the cluster a chance to get in +sync and fully resilient without deploying additional shard replicas and thus +without causing any data imbalance: + +- `--agency.supervision-delay-add-follower`: + The delay in supervision, before an AddFollower job is executed (in seconds). + +- `--agency.supervision-delay-failed-follower`: + The delay in supervision, before a FailedFollower job is executed (in seconds). + +Introduced in: v3.9.7, v3.10.2 + +A `--agency.supervision-failed-leader-adds-follower` startup option has been +added with a default of `true` (behavior as before). If you set this option to +`false`, a `FailedLeader` job does not automatically configure a new shard +follower, thereby preventing unnecessary network traffic, CPU load, and I/O load +for the case that the server comes back quickly. If the server has permanently +failed, an `AddFollower` job is created anyway eventually, as governed by the +`--agency.supervision-delay-add-follower` option. + +### RocksDB Bloom filter option + +Introduced in: v3.10.3 + +A new `--rocksdb.bloom-filter-bits-per-key` startup option has been added to +configure the number of bits to use per key in a Bloom filter. + +The default value is `10`, which is downwards-compatible to the previously +hard-coded value. + +### Disable user-defined AQL functions + +Introduced in: v3.10.4 + +The new `--javascript.user-defined-functions` startup option lets you disable +user-defined AQL functions so that no user-defined JavaScript code of +[UDFs](../../aql/user-defined-functions.md) runs on the server. This can be useful to close off +a potential attack vector in case no user-defined AQL functions are used. +Also see [Server security options](../../operations/security/security-options.md). + +### Option to disable Foxx + +Introduced in: v3.10.5 + +A `--foxx.enable` startup option has been added to let you configure whether +access to user-defined Foxx services is possible for the instance. It defaults +to `true`. + +If you set the option to `false`, access to Foxx services is forbidden and is +responded with an HTTP `403 Forbidden` error. Access to the management APIs for +Foxx services are also disabled as if you set `--foxx.api false` manually. + +Access to ArangoDB's built-in web interface, which is also a Foxx service, is +still possible even with the option set to `false`. + +Disabling the access to Foxx can be useful to close off a potential attack +vector in case Foxx is not used. +Also see [Server security options](../../operations/security/security-options.md). + +### RocksDB auto-flushing + +Introduced in: v3.9.10, v3.10.5 + +A new feature for automatically flushing RocksDB Write-Ahead Log (WAL) files and +in-memory column family data has been added. + +An auto-flush occurs if the number of live WAL files exceeds a certain threshold. +This ensures that WAL files are moved to the archive when there are a lot of +live WAL files present, for example, after a restart. In this case, RocksDB does +not count any previously existing WAL files when calculating the size of WAL +files and comparing its `max_total_wal_size`. Auto-flushing fixes this problem, +but may prevent WAL files from being moved to the archive quickly. + +You can configure the feature via the following new startup options: +- `--rocksdb.auto-flush-min-live-wal-files`: + The minimum number of live WAL files that triggers an auto-flush. Defaults to `10`. +- `--rocksdb.auto-flush-check-interval`: + The interval (in seconds) in which auto-flushes are executed. Defaults to `3600`. + Note that an auto-flush is only executed if the number of live WAL files + exceeds the configured threshold and the last auto-flush is longer ago than + the configured auto-flush check interval. This avoids too frequent auto-flushes. + +### Configurable whitespace in metrics + +Introduced in: v3.10.6 + +The output format of the metrics API slightly changed in v3.10.0. It no longer +had a space between the label and the value for metrics with labels. Example: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"}0 +``` + +The new `--server.ensure-whitespace-metrics-format` startup option lets you +control whether the metric label and value shall be separated by a space for +improved compatibility with some tools. This option is enabled by default. +From v3.10.6 onward, the default output format looks like this: + +``` +arangodb_agency_cache_callback_number{role="SINGLE"} 0 +``` + +### Configurable interval when counting open file descriptors + +Introduced in: v3.10.7 + +The `--server.count-descriptors-interval` startup option can be used to specify +the update interval in milliseconds when counting the number of open file +descriptors. + +The default value is `60000`, i.e. the update interval is once per minute. +To disable the counting of open file descriptors, you can set the value to `0`. +If counting is turned off, the `arangodb_file_descriptors_current` metric +reports a value of `0`. + +### Configurable limit of collections per query + +Introduced in: v3.10.7, v3.11.1 + +The `--query.max-collections-per-query` startup option allows you to adjust the +previously fixed limit for the maximum number of collections/shards per AQL query. +The default value is `2048`, which is equal to the fixed limit of +collections/shards in older versions. + +### Custom arguments to rclone + +Introduced in: v3.9.11, v3.10.7, v3.11.1 + +The `--rclone.argument` startup option can be used to prepend custom arguments +to rclone. For example, you can enable debug logging to a separate file on +startup as follows: + +``` +arangod --rclone.argument "--log-level=DEBUG" --rclone.argument "--log-file=rclone.log" +``` + +### LZ4 compression for values in the in-memory edge cache + +Introduced in: v3.11.2 + +LZ4 compression of edge index cache values allows to store more data in main +memory than without compression, so the available memory can be used more +efficiently. The compression is transparent and does not require any change to +queries or applications. +The compression can add CPU overhead for compressing values when storing them +in the cache, and for decompressing values when fetching them from the cache. + +The new startup option `--cache.min-value-size-for-edge-compression` can be +used to set a threshold value size for compression edge index cache payload +values. The default value is `1GB`, which effectively turns compression +off. Setting the option to a lower value (i.e. `100`) turns on the +compression for any payloads whose size exceeds this value. + +The new startup option `--cache.acceleration-factor-for-edge-compression` can +be used to fine-tune the compression. The default value is `1`. +Higher values typically mean less compression but faster speeds. + +The following new metrics can be used to determine the usefulness of +compression: + +- `rocksdb_cache_edge_inserts_effective_entries_size_total`: returns the total + number of bytes of all entries that were ever stored in the in-memory edge cache, + after compression was attempted/applied. This metric is populated regardless + of whether compression is used or not. +- `rocksdb_cache_edge_inserts_uncompressed_entries_size_total`: returns the total + number of bytes of all entries that were ever stored in the in-memory edge + cache, before compression was applied. This metric is populated regardless of + whether compression is used or not. +- `rocksdb_cache_edge_compression_ratio`: returns the effective + compression ratio for all edge cache entries ever stored in the cache. + +Note that these metrics are increased upon every insertion into the edge +cache, but not decreased when data gets evicted from the cache. + +### Limit the number of databases in a deployment + +Introduced in: v3.10.10, v3.11.2 + +The `--database.max-databases` startup option allows you to limit the +number of databases that can exist in parallel in a deployment. You can use this +option to limit the resources used by database objects. If the option is used +and there are already as many databases as configured by this option, any +attempt to create an additional database fails with error +`32` (`ERROR_RESOURCE_LIMIT`). Additional databases can then only be created +if other databases are dropped first. The default value for this option is +unlimited, so an arbitrary amount of databases can be created. + +### Cluster-internal connectivity checks + +Introduced in: v3.11.5 + +This feature makes Coordinators and DB-Servers in a cluster periodically send +check requests to each other, in order to see if all nodes can connect to +each other. +If a cluster-internal connection to another Coordinator or DB-Server cannot +be established within 10 seconds, a warning is now logged. + +The new `--cluster.connectivity-check-interval` startup option can be used +to control the frequency of the connectivity check, in seconds. +If set to a value greater than zero, the initial connectivity check is +performed approximately 15 seconds after the instance start, and subsequent +connectivity checks are executed with the specified frequency. +If set to `0`, connectivity checks are disabled. + +You can also use the following metrics to monitor and detect temporary or +permanent connectivity issues: +- `arangodb_network_connectivity_failures_coordinators`: Number of failed + connectivity check requests sent by this instance to Coordinators. +- `arangodb_network_connectivity_failures_dbservers_total`: Number of failed + connectivity check requests sent to DB-Servers. + +### Configurable maximum for queued log entries + +Introduced in: v3.10.12, v3.11.5 + +The new `--log.max-queued-entries` startup option lets you configure how many +log entries are queued in a background thread. + +Log entries are pushed on a queue for asynchronous writing unless you enable the +`--log.force-direct` startup option. If you use a slow log output (e.g. syslog), +the queue might grow and eventually overflow. + +You can configure the upper bound of the queue with this option. If the queue is +full, log entries are written synchronously until the queue has space again. + +### Monitoring per collection/database/user + +Introduced in: v3.10.13, v3.11.7 + +The following metrics have been introduced to track per-shard requests on +DB-Servers: +- `arangodb_collection_leader_reads_total`: The number of read requests on + leaders, per shard, and optionally also split by user. +- `arangodb_collection_leader_writes_total`: The number of write requests on + leaders, per shard, and optionally also split by user. +- `arangodb_collection_requests_bytes_read_total`: The number of bytes read in + read requests on leaders. +- `arangodb_collection_requests_bytes_written_total`: The number of bytes written + in write requests on leaders and followers. + +To opt into these metrics, you can use the new `--server.export-shard-usage-metrics` +startup option. It can be set to one of the following values on DB-Servers: +- `disabled`: No shard usage metrics are recorded nor exported. This is the + default value. +- `enabled-per-shard`: This makes DB-Servers collect per-shard usage metrics. +- `enabled-per-shard-per-user`: This makes DB-Servers collect per-shard + and per-user metrics. This is more granular than `enabled-per-shard` but + can produce a lot of metrics. + +Whenever a shard is accessed in read or write mode by one of the following +operations, the metrics are populated dynamically, either with a per-user +label or not, depending on the above setting. +The metrics are retained in memory on DB-Servers. Removing databases, +collections, or users that are already included in the metrics won't remove +the metrics until the DB-Server is restarted. + +The following operations increase the metrics: +- AQL queries: an AQL query increases the read or write counters exactly + once for each involved shard. For shards that are accessed in read/write + mode, only the write counter is increased. +- Single-document insert, update, replace, and remove operations: for each + such operation, the write counter is increased once for the affected + shard. +- Multi-document insert, update, replace, and remove operations: for each + such operation, the write counter is increased once for each shard + that is affected by the operation. Note that this includes collection + truncate operations. +- Single and multi-document read operations: for each such operation, the + read counter is increased once for each shard that is affected by the + operation. + +The metrics are increased when any of the above operations start, and they +are not decreased should an operation abort or if an operation does not +lead to any actual reads or writes. + +As there can be many of these dynamic metrics based on the number of shards +and/or users in the deployment, these metrics are turned off by default. +When turned on, the metrics are exposed only via the new +`GET /_admin/usage-metrics` endpoint. They are not exposed via the existing +metrics `GET /_admin/metrics` endpoint. + +Note that internal operations, such as internal queries executed for statistics +gathering, internal garbage collection, and TTL index cleanup are not counted in +these metrics. Additionally, all requests that are using the superuser JWT for +authentication and that do not have a specific user set are not counted. + +Enabling these metrics can likely result in a small latency overhead of a few +percent for write operations. The exact overhead depends on +several factors, such as the type of operation (single or multi-document operation), +replication factor, network latency, etc. + +## Miscellaneous changes + +### Write-write conflict improvements + +It is now less likely that writes to the same document in quick succession +result in write-write conflicts for single document operations that use the +Document HTTP API. See +[Incompatible changes in ArangoDB 3.11](incompatible-changes-in-oem.md#write-write-conflict-improvements) +about the detailed behavior changes. + +### Trace logs for graph traversals and path searches + +Detailed information is now logged if you run AQL graph traversals +or (shortest) path searches with AQL and set the +log level to `TRACE` for the `graphs` log topic. This information is fairly +low-level but can help to understand correctness and performance issues with +traversal queries. There are also some new log messages for the `DEBUG` level. + +To enable tracing for traversals and path searches at startup, you can set +`--log.level graphs=trace`. + +To enable or disable it at runtime, you can call the +[`PUT /_admin/log/level`](../../develop/http-api/monitoring/logs.md#set-the-server-log-levels) +endpoint of the HTTP API and set the log level using a request body like +`{"graphs":"TRACE"}`. + +### Persisted Pregel execution statistics + +Pregel algorithm executions now persist execution statistics to a system +collection. The statistics are kept until you remove them, whereas the +previously existing interfaces only store the information about Pregel jobs +temporarily in memory. + +To access and delete persisted execution statistics, you can use the newly added +`history()` and `removeHistory()` JavaScript API methods of the Pregel module: + +```js +var pregel = require("@arangodb/pregel"); +const execution = pregel.start("sssp", "demograph", { source: "vertices/V" }); +const historyStatus = pregel.history(execution); +pregel.removeHistory(); +``` + +You can also use the newly added HTTP endpoints with the +`/_api/control_pregel/history` route. + +You can still use the old interfaces (the `pregel.status()` method as well as +the `GET /_api/control_pregel` and `GET /_api/control_pregel/{id}` endpoints). + +### ArangoSearch metric + +The following ArangoSearch metric has been added in version 3.11: + +| Label | Description | +|:------|:------------| +| `arangodb_search_num_primary_docs` | Number of primary documents for current snapshot. | + +### Traffic accounting metrics + +Introduced in: v3.8.9, v3.9.6, v3.10.2 + +The following metrics for traffic accounting have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_client_user_connection_statistics_bytes_received` | Bytes received for requests, only user traffic. | +| `arangodb_client_user_connection_statistics_bytes_sent` | Bytes sent for responses, only user traffic. +| `arangodb_http1_connections_total` | Total number of HTTP/1.1 connections accepted. | + +### Configurable `CACHE_OBLIVIOUS` option for jemalloc + +Introduced in: v3.9.7, v3.10.3 + +The jemalloc memory allocator supports an option to toggle cache-oblivious large +allocation alignment. It is enabled by default up to v3.10.3, but disabled from +v3.10.4 onwards. Disabling it helps to save 4096 bytes of memory for every +allocation which is at least 16384 bytes large. This is particularly beneficial +for the RocksDB buffer cache. + +You can now configure the option by setting a `CACHE_OBLIVIOUS` environment +variable to the string `true` or `false` before starting ArangoDB. + +See [ArangoDB Server environment variables](../../components/arangodb-server/environment-variables.md) +for details. + +### WAL file tracking metrics + +Introduced in: v3.9.10, v3.10.5 + +The following metrics for write-ahead log (WAL) file tracking have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_live_wal_files` | Number of live RocksDB WAL files. | +| `rocksdb_wal_released_tick_flush` | Lower bound sequence number from which WAL files need to be kept because of external flushing needs. | +| `rocksdb_wal_released_tick_replication` | Lower bound sequence number from which WAL files need to be kept because of replication. | +| `arangodb_flush_subscriptions` | Number of currently active flush subscriptions. | + +### Number of replication clients metric + +Introduced in: v3.10.5 + +The following metric for the number of replication clients for a server has +been added: + +| Label | Description | +|:------|:------------| +| `arangodb_replication_clients` | Number of currently connected/active replication clients. | + +### Reduced memory usage of in-memory edge indexes + +Introduced in: v3.10.5 + +The memory usage of in-memory edge index caches is reduced if most of the edges +in an index refer to a single or mostly the same collection. + +Previously, the full edge IDs, consisting of the referred-to collection +name and the referred-to key of the edge, were stored in full, i.e. the full +values of the edges' `_from` and `_to` attributes. +Now, the first edge inserted into an edge index' in-memory cache determines +the collection name for which all corresponding edges can be stored +prefix-compressed. + +For example, when inserting an edge pointing to `the-collection/abc` into the +empty cache, the collection name `the-collection` is noted for that cache +as a prefix. The edge is stored in-memory as only `/abc`. Further edges +that are inserted into the cache and that point to the same collection are +also stored prefix-compressed. + +The prefix compression is transparent and does not require configuration or +setup. Compression is done separately for each cache, i.e. a separate prefix +can be used for each individual edge index, and separately for the `_from` and +`_to` parts. Lookups from the in-memory edge cache do not return compressed +values but the full-length edge IDs. The compressed values are also used +in-memory only and are not persisted on disk. + +### Sending delay metrics for internal requests + +Introduced in: v3.9.11, v3.10.6 + +The following metrics for diagnosing delays in cluster-internal network requests +have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_network_dequeue_duration` | Internal request duration for the dequeue in seconds. | +| `arangodb_network_response_duration` | Internal request duration from fully sent till response received in seconds. | +| `arangodb_network_send_duration` | Internal request send duration in seconds. | +| `arangodb_network_unfinished_sends_total` | Number of internal requests for which sending has not finished. | + +### Peak memory metric for in-memory caches + +Introduced in: v3.10.7 + +This new metric stores the peak value of the `rocksdb_cache_allocated` metric: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_peak_allocated` | Global peak memory allocation of ArangoDB in-memory caches. | + +### Number of SST files metric + +Introduced in: v3.10.7, v3.11.1 + +This new metric reports the number of RocksDB `.sst` files: + +| Label | Description | +|:------|:------------| +| `rocksdb_total_sst_files` | Total number of RocksDB sst files, aggregated over all levels. | + +### File descriptor metrics + +Introduced in: v3.10.7 + +The following system metrics have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_file_descriptors_limit` | System limit for the number of open files for the arangod process. | +| `arangodb_file_descriptors_current` | Number of file descriptors currently opened by the arangod process. | + +### More instant Hot Backups + +Introduced in: v3.10.10, v3.11.3 + +Cluster deployments no longer wait for all in-progress transactions to get +committed when a user requests a Hot Backup. The waiting could cause deadlocks +and thus Hot Backups to fail, in particular in the Arango Managed Platform (AMP). Now, Hot Backups are +created immediately and commits have to wait until the backup process is done. + +### In-memory edge cache startup options and metrics + +Introduced in: v3.11.4 + +The following startup options have been added: + +- `--cache.max-spare-memory-usage`: the maximum memory usage for spare tables + in the in-memory cache. + +- `--cache.high-water-multiplier`: controls the cache's effective memory usage + limit. The user-defined memory limit (i.e. `--cache.size`) is multiplied with + this value to create the effective memory limit, from which on the cache tries + to free up memory by evicting the oldest entries. The default value is `0.56`, + matching the previously hardcoded 56% for the cache subsystem. + + You can increase the multiplier to make the cache subsystem use more memory, but + this may overcommit memory because the cache memory reclamation procedure is + asynchronous and can run in parallel to other tasks that insert new data. + In case a deployment's memory usage is already close to the maximum, increasing + the multiplier can lead to out-of-memory (OOM) kills. + +The following metrics have been added: + +| Label | Description | +|:------|:------------| +| `rocksdb_cache_edge_compressed_inserts_total` | Total number of compressed inserts into the in-memory edge cache. | +| `rocksdb_cache_edge_empty_inserts_total` | Total number of insertions into the in-memory edge cache for non-connected edges. | +| `rocksdb_cache_edge_inserts_total` | Total number of insertions into the in-memory edge cache. | + +### Observability of in-memory cache subsystem + +Introduced in: v3.10.11, v3.11.4 + +The following metrics have been added to improve the observability of in-memory +cache subsystem: +- `rocksdb_cache_free_memory_tasks_total`: Total number of free memory tasks + that were scheduled by the in-memory edge cache subsystem. This metric will + be increased whenever the cache subsystem schedules a task to free up memory + in one of the managed in-memory caches. It is expected to see this metric + rising when the cache subsystem hits its global memory budget. +- `rocksdb_cache_free_memory_tasks_duration_total`: Total amount of time spent + inside the free memory tasks of the in-memory cache subsystem. Free memory + tasks are scheduled by the cache subsystem to free up memory in existing cache + hash tables. +- `rocksdb_cache_migrate_tasks_total`: Total number of migrate tasks that were + scheduled by the in-memory edge cache subsystem. This metric will be increased + whenever the cache subsystem schedules a task to migrate an existing cache hash + table to a bigger or smaller size. +- `rocksdb_cache_migrate_tasks_duration_total`: Total amount of time spent inside + the migrate tasks of the in-memory cache subsystem. Migrate tasks are scheduled + by the cache subsystem to migrate existing cache hash tables to a bigger or + smaller table. + +### Detached scheduler threads + +Introduced in: v3.10.13, v3.11.5 + +A scheduler thread now has the capability to detach itself from the scheduler +if it observes the need to perform a potentially long running task, like waiting +for a lock. This allows a new scheduler thread to be started and prevents +scenarios where all threads are blocked waiting for a lock, which has previously +led to deadlock situations. + +Threads waiting for more than 1 second on a collection lock will detach +themselves. + +The following startup option has been added: +- `--server.max-number-detached-threads`: The maximum number of detached scheduler + threads. + +The following metric has been added: +- `arangodb_scheduler_num_detached_threads`: The number of worker threads + currently started and detached from the scheduler. + +### Memory usage of connection and request statistics + +Introduced in: v3.10.12, v3.11.6 + +The following metrics have been added: + +| Label | Description | +|:------|:------------| +| `arangodb_connection_statistics_memory_usage` | Total memory usage of connection statistics. | +| `arangodb_request_statistics_memory_usage` | Total memory usage of request statistics. | + +If the `--server.statistics` startup option is set to `true`, then some +connection and request statistics are built up in memory for incoming request. +It is expected that the memory usage reported by these metrics remains +relatively constant over time. It may grow only when there are bursts of new +connections. Some memory is pre-allocated at startup for higher efficiency. If the +`--server.statistics` startup option is set to `false`, then no memory will be +allocated for connection and request statistics. + +## Client tools + +### arangodump + +#### Option to not dump Views + +_arangodump_ has a new `--dump-views` startup option to control whether +View definitions shall be included in the backup. The default value is `true`. + +#### Improved dump performance (experimental) + +Introduced in: v3.10.8, v3.11.2 + +_arangodump_ has experimental extended parallelization capabilities +to work not only at the collection level, but also at the shard level. +In combination with the newly added support for the VelocyPack format that +ArangoDB uses internally, database dumps can now be created and restored more +quickly and occupy less disk space. This major performance boost makes dumps and +restores up to several times faster, which is extremely useful when dealing +with large shards. + +- Whether the new parallel dump variant is used is controlled by the newly added + `--use-experimental-dump` startup option (introduced in v3.10.8 and v3.11.2). + The default value is `false`. + +- Optionally, you can make _arangodump_ write multiple output files per + collection/shard (introduced in v3.10.10 and v3.11.2). + The file splitting allows for better parallelization when + writing the results to disk, which in case of non-split files must be serialized. + You can enable it by setting the `--split-files` option to `true`. This option + is disabled by default because dumps created with this option enabled cannot + be restored into previous versions of ArangoDB. + +## Internal changes + +### Upgraded bundled library versions + +The bundled version of the OpenSSL library has been upgraded from 1.1.1 to 3.0.8. + +The bundled version of the zlib library has been upgraded to 1.2.13. + +The bundled version of the fmt library has been upgraded to 9.1.0. + +The bundled version of the immer library has been upgraded to 0.8.0. + +The bundled versions of the abseil-cpp, s2geometry, and wcwidth library have +been updated to more recent versions that don't have a version number. + +For ArangoDB 3.11, the bundled version of rclone is 1.62.2. Check if your +rclone configuration files require changes. + +From version 3.11.10 onward, ArangoDB uses the glibc C standard library +implementation with an LGPL-3.0 license instead of libmusl. Notably, it features +string functions that are better optimized for common CPUs. diff --git a/site/content/arangodb/oem/_index.md b/site/content/arangodb/oem/_index.md new file mode 100644 index 0000000000..504f67ecc9 --- /dev/null +++ b/site/content/arangodb/oem/_index.md @@ -0,0 +1,42 @@ +--- +title: Recommended Resources +menuTitle: 'OEM LTS' +weight: 97 +layout: default +--- +{{< cloudbanner >}} + +{{< cards >}} + +{{% card title="What is ArangoDB?" link="about/" %}} +Get to know graphs, ArangoDB's use cases and features. +{{% /card %}} + +{{% card title="Get started" link="get-started/" %}} +Learn about ArangoDB's core concepts, how to interact with the database system, +and get a server instance up and running. +{{% /card %}} + +{{% card title="Arango Managed Platform (AMP)" link="amp/" %}} +Try out Arango's fully-managed cloud offering for a faster time to value. +{{% /card %}} + +{{% card title="AQL" link="aql/" %}} +ArangoDB's Query Language AQL lets you use graphs, JSON documents, and search +via a single, composable query language. +{{% /card %}} + +{{% card title="Data Science" link="data-science/" %}} +Discover the graph analytics and machine learning features of ArangoDB. +{{% /card %}} + +{{% card title="Deploy" link="deploy/" %}} +Find the right deployment mode and set up your ArangoDB instance. +{{% /card %}} + +{{% card title="Develop" link="develop/" %}} +See the in-depth feature and API documentation to start developing applications +with ArangoDB as your backend. +{{% /card %}} + +{{< /cards >}} diff --git a/site/content/arangodb/oem/aql/_index.md b/site/content/arangodb/oem/aql/_index.md new file mode 100644 index 0000000000..688215f3dc --- /dev/null +++ b/site/content/arangodb/oem/aql/_index.md @@ -0,0 +1,36 @@ +--- +title: AQL Documentation +menuTitle: AQL +weight: 70 +description: >- + The ArangoDB Query Language (AQL) lets you store, retrieve, and modify data + in various ways in ArangoDB +--- +AQL is mainly a declarative language, meaning that a query expresses what result +should be achieved but not how it should be achieved. AQL aims to be +human-readable and therefore uses keywords from the English language. Another +design goal of AQL was client independence, meaning that the language and syntax +are the same for all clients, no matter what programming language the clients +may use. Further design goals of AQL were the support of complex query patterns +and the different data models ArangoDB offers. + +In its purpose, AQL is similar to the Structured Query Language (SQL). AQL supports +reading and modifying collection data, but it doesn't support data-definition +operations such as creating and dropping databases, collections and indexes. +It is a pure data manipulation language (DML), not a data definition language +(DDL) or a data control language (DCL). + +The syntax of AQL queries is different to SQL, even if some keywords overlap. +Nevertheless, AQL should be easy to understand for anyone with an SQL background. + +The general workflow when executing a query is as follows: + +1. A client application ships an AQL query to the ArangoDB server. The query text + contains everything ArangoDB needs to compute the result set. +2. ArangoDB parses the query, executes it, and compiles the results. If the + query is invalid or cannot be executed, the server returns an error that + the client can process and react to. If the query can be executed + successfully, the server returns the query results (if any) to the client. + +For example queries, see the [Data Queries](data-queries.md) and +[Examples & Query Patterns](examples-and-query-patterns/_index.md) chapters. diff --git a/site/content/arangodb/oem/aql/common-errors.md b/site/content/arangodb/oem/aql/common-errors.md new file mode 100644 index 0000000000..352c3ae8e2 --- /dev/null +++ b/site/content/arangodb/oem/aql/common-errors.md @@ -0,0 +1,420 @@ +--- +title: Common Errors in AQL +menuTitle: Common Errors +weight: 55 +description: >- + Avoid injection vulnerabilities and avoid pitfalls like incorrect operator + usage performance issues when using ArangoDB's query language +--- +## Trailing semicolons in query strings + +Many SQL databases allow sending multiple queries at once. In this case, multiple +queries are separated using the semicolon character. Often it is also supported to +execute a single query that has a semicolon at its end. + +AQL does not support this, and it is a parse error to use a semicolon at the end +of an AQL query string. + +## String concatenation + +In AQL, strings must be concatenated using the [`CONCAT()`](functions/string.md#concat) +function. Joining them together with the `+` operator is not supported. Especially +as JavaScript programmer it is easy to walk into this trap: + +```aql +RETURN "foo" + "bar" // [ 0 ] +RETURN "foo" + 123 // [ 123 ] +RETURN "123" + 200 // [ 323 ] +``` + +The arithmetic plus operator expects numbers as operands, and will try to implicitly +cast them to numbers if they are of different type. `"foo"` and `"bar"` are casted +to `0` and then added to together (still zero). If an actual number is added, that +number will be returned (adding zero doesn't change the result). If the string is a +valid string representation of a number, then it is casted to a number. Thus, adding +`"123"` and `200` results in two numbers being added up to `323`. + +To concatenate elements (with implicit casting to string for non-string values), do: + +```aql +RETURN CONCAT("foo", "bar") // [ "foobar" ] +RETURN CONCAT("foo", 123) // [ "foo123" ] +RETURN CONCAT("123", 200) // [ "123200" ] +``` + +## Parameter injection vulnerability + +Parameter injection means that potentially malicious content is inserted into a +query which may change its meaning. It is a security issue that may allow an +attacker to execute arbitrary queries on the database data. + +It often occurs if applications trustfully insert user-provided inputs into a +query string, and do not fully or incorrectly filter them. It also occurs often +when applications build queries naively, without using security mechanisms often +provided by database software or querying mechanisms. + +AQL is not vulnerable to parameter injection in itself, but queries might be +constructed on the client-side, on an application server or in a Foxx service. +Assembling query strings with simple **string concatenation** looks trivial, +but is potentially **unsafe**. You should use +[bind parameters](fundamentals/bind-parameters.md) instead whenever possible, +use query building functionality if provided by a driver (see +[arangojs AQL Helpers](https://arangodb.github.io/arangojs/latest/functions/aql.aql.html) +for example) or at least sanitize user input with great care. + +### Parameter injection examples + +Below you find a simple query using the [JavaScript API](../develop/javascript-api/_index.md) +that is fed with some dynamic input value, pretending it coming from a web form. +This could be the case in a Foxx service. The route happily picks up the input +value, and puts it into a query: + +```js +// evil! +var what = req.params("searchValue"); // user input value from web form +// ... +var query = "FOR doc IN collection FILTER doc.value == " + what + " RETURN doc"; +db._query(query, params).toArray(); +``` + +The above will probably work fine for numeric input values. + +What could an attacker do to this query? Here are a few suggestions to use for +the `searchValue` parameter: + +- for returning all documents in the collection:\ + `1 || true` +- for removing all documents:\ + `1 || true REMOVE doc IN collection //` +- for inserting new documents:\ + `1 || true INSERT { foo: "bar" } IN collection //` + +It should have become obvious that this is extremely unsafe and should be +avoided. A pattern often seen to counteract this is trying to quote and escape +potentially unsafe input values before putting them into query strings. +This may work in some situations, but it is easy to overlook something or get +it subtly wrong: + +```js +// We are sanitizing now, but it is still evil! +var value = req.params("searchValue").replace(/'/g, ''); +// ... +var query = "FOR doc IN collection FILTER doc.value == '" + value + "' RETURN doc"; +db._query(query, params).toArray(); +``` + +The above example uses single quotes for enclosing the potentially unsafe user +input, and also replaces all single quotes in the input value beforehand. +Not only may that change the user input (leading to subtle errors such as +_"why does my search for `O'Brien` not return any results?"_), but it is +also still unsafe. If the user input contains a backslash at the end +(e.g. `foo bar\`), that backslash will escape the closing single quote, +allowing the user input to break out of the string fence again. + +It gets worse if user input is inserted into the query at multiple places. +Let us assume we have a query with two dynamic values: + +```js +query = "FOR doc IN collection FILTER doc.value == '" + value + + "' && doc.type == '" + type + "' RETURN doc"; +``` + +If an attacker inserted `\` for parameter `value` and +` || true REMOVE doc IN collection //` for parameter `type`, then the effective +query would become: + +```aql +FOR doc IN collection + FILTER doc.value == '\' && doc.type == ' || true + REMOVE doc IN collection //' RETURN doc +``` + +… which is highly undesirable. The backslash escapes the closing single quote, +turning the `doc.type` condition into a string, which gets compared to +`doc.value`. Further more, an always true or-condition as well as a remove +operation are injected, changing the query purpose entirely. The original +return operation gets commented out and the query will truncate the collection +instead of returning a few documents. + +### Avoiding parameter injection + +Instead of mixing query string fragments with user inputs naively via string +concatenation, use either **bind parameters** or a **query builder**. Both can +help to avoid the problem of injection, because they allow separating the actual +query operations (like `FOR`, `INSERT`, `REMOVE`) from (user input) values. + +Below, the focus is on bind parameters. This is not to say that query builders +shouldn't be used. They were simply omitted here for the sake of simplicity. + +#### What bind parameters are + +Bind parameters in AQL queries are special tokens that act as placeholders for +actual values. Here's an example: + +```aql +FOR doc IN collection + FILTER doc.value == @what + RETURN doc +``` + +In the above query, `@what` is a bind parameter. In order to execute this query, +a value for bind parameter `@what` must be specified. Otherwise query execution will +fail with error 1551 (*no value specified for declared bind parameter*). If a value +for `@what` gets specified, the query can be executed. However, the query string +and the bind parameter values (i.e. the contents of the `@what` bind parameter) will +be handled separately. What's in the bind parameter will always be treated as a value, +and it can't get out of its sandbox and change the semantic meaning of a query. + +#### How bind parameters are used + +To execute a query with bind parameters, the query string (containing the bind +parameters) and the bind parameter values are specified separately (note that when +the bind parameter value is assigned, the prefix `@` needs to be omitted): + +```js +// query string with bind parameter +var query = "FOR doc IN collection FILTER doc.value == @what RETURN doc"; + +// actual value for bind parameter +var params = { what: 42 }; + +// run query, specifying query string and bind parameter separately +db._query(query, params).toArray(); +``` + +If a malicious user would set `@what` to a value of `1 || true`, this wouldn't do +any harm. AQL would treat the contents of `@what` as a single string token, and +the meaning of the query would remain unchanged. The actually executed query would be: + +```aql +FOR doc IN collection + FILTER doc.value == "1 || true" + RETURN doc +``` + +Thanks to bind parameters it is also impossible to turn a selection (i.e. read-only) +query into a data deletion query. + +#### Using JavaScript variables as bind parameters + +There is also a template string generator function `aql` that can be used to safely +(and conveniently) built AQL queries using JavaScript variables and expressions. It +can be invoked as follows: + +```js +const aql = require('@arangodb').aql; // not needed in arangosh + +var value = "some input value"; +var query = aql`FOR doc IN collection + FILTER doc.value == ${value} + RETURN doc`; +var result = db._query(query).toArray(); +``` + +Note that an ES6 template string is used for populating the `query` variable. +The string is assembled using the `aql` generator function which is bundled +with ArangoDB. The template string can contain references to JavaScript +variables or expressions via `${...}`. In the above example, the query +references a variable named `value`. The `aql` function generates an object +with two separate attributes: the query string, containing references to +bind parameters, and the actual bind parameter values. + +Bind parameter names are automatically generated by the `aql` function: + +```js +var value = "some input value"; +aql`FOR doc IN collection FILTER doc.value == ${value} RETURN doc`; + +{ + "query" : "FOR doc IN collection FILTER doc.value == @value0 RETURN doc", + "bindVars" : { + "value0" : "some input value" + } +} +``` + +#### Using bind parameters in dynamic queries + +Bind parameters are helpful, so it makes sense to use them for handling the +dynamic values. You can even use them for queries that itself are highly +dynamic, for example with conditional `FILTER` and `LIMIT` parts. +Here's how to do this: + +```js +// Note: this example has a slight issue... hang on reading +var query = "FOR doc IN collection"; +var params = { }; + +if (useFilter) { + query += " FILTER doc.value == @what"; + params.what = req.params("searchValue"); +} + +if (useLimit) { + // not quite right, see below + query += " LIMIT @offset, @count"; + params.offset = req.params("offset"); + params.count = req.params("count"); +} + +query += " RETURN doc"; +db._query(query, params).toArray(); +``` + +Note that in this example we're back to string concatenation, but without the +problem of the query being vulnerable to arbitrary modifications. + +#### Input value validation and sanitation + +Still you should prefer to be paranoid, and try to detect invalid input values +as early as possible, at least before executing a query with them. This is +because some input parameters may affect the runtime behavior of queries +negatively or, when modified, may lead to queries throwing runtime errors +instead of returning valid results. This isn't something an attacker +should deserve. + +`LIMIT` is a good example for this: if used with a single argument, the +argument should be numeric. When `LIMIT` is given a string value, executing +the query will fail. You may want to detect this early and don't return an +HTTP 500 (as this would signal attackers that they were successful breaking +your application). + +Another problem with `LIMIT` is that high `LIMIT` values are likely more +expensive than low ones, and you may want to disallow using `LIMIT` values +exceeding a certain threshold. + +Here is what you could do in such cases: + +```js +var query = "FOR doc IN collection LIMIT @count RETURN doc"; + +// some default value for limit +var params = { count: 100 }; + +if (useLimit) { + var count = req.params("count"); + + // abort if value does not look like an integer + if (! preg_match(/^d+$/, count)) { + throw "invalid count value!"; + } + + // actually turn it into an integer + params.count = parseInt(count, 10); // turn into numeric value +} + +if (params.count < 1 || params.count > 1000) { + // value is outside of accepted thresholds + throw "invalid count value!"; +} + +db._query(query, params).toArray(); +``` + +This is a bit more complex, but that is a price you are likely willing to pay +for a bit of extra safety. In reality you may want to use a framework for +validation (such as [joi](https://www.npmjs.com/package/joi) +which comes bundled with ArangoDB) instead of writing your own checks all over +the place. + +#### Bind parameter types + +There are two types of bind parameters in AQL: + +- Bind parameters for **values**:\ + Those are prefixed with a single `@` in AQL queries, and are specified + without the prefix when they get their value assigned. These bind parameters + can contain any valid JSON value. + + Examples: `@what`, `@searchValue` + +- Bind parameters for **collections**:\ + These are prefixed with `@@` in AQL queries, and are replaced with the name + of a collection. When the bind parameter value is assigned, the parameter + itself must be specified with a single `@` prefix. Only string values are + allowed for this type of bind parameters. + + Examples: `@@collection`, `@@edgeColl` + +The latter type of bind parameter is probably not used as often, and it should +not be used together with user input. Otherwise users may freely determine on +which collection your AQL queries will operate on (this might be a valid +use case, but normally it is extremely undesired). + +## Unexpected long running queries + +Slow queries can have various reasons and be legitimate for queries with a high +computational complexity or if they touch a lot of data. Use the *Explain* +feature to inspect execution plans and verify that appropriate indexes are +utilized. Also check for mistakes such as references to the wrong variables. + +A literal collection name, which is not part of constructs like `FOR`, +`UPDATE ... IN` etc., stands for an array of all documents of that collection +and can cause an entire collection to be materialized before further +processing. It should thus be avoided. + +Check the execution plan for `/* all collection documents */` and verify that +it is intended. You should also see a warning if you execute such a query: + +> collection 'coll' used as expression operand + +For example, instead of: + +```aql +RETURN coll[* LIMIT 1] +``` + +... with the execution plan ... + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 2 CalculationNode 1 - LET #2 = coll /* all collection documents */[* LIMIT 0, 1] /* v8 expression */ + 3 ReturnNode 1 - RETURN #2 +``` + +... you can use the following equivalent query: + +```aql +FOR doc IN coll + LIMIT 1 + RETURN doc +``` + +... with the (better) execution plan: + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 2 EnumerateCollectionNode 44 - FOR doc IN Characters /* full collection scan */ + 3 LimitNode 1 - LIMIT 0, 1 + 4 ReturnNode 1 - RETURN doc +``` + +Similarly, make sure you have not confused any variable names with collection +names by accident: + +```aql +LET names = ["John", "Mary", ...] +// supposed to refer to variable "names", not collection "Names" +FOR name IN Names + ... +``` + +You can set the startup option `--query.allow-collections-in-expressions` to +*false* to disallow collection names in arbitrary places in AQL expressions +to prevent such mistakes. Also see +[ArangoDB Server Query Options](../components/arangodb-server/options.md#--queryallow-collections-in-expressions) + +{{% comment %}} +Rename to Error Sources? + +Quote marks around bind parameter placeholders +https://github.com/arangodb/arangodb/issues/1634#issuecomment-167808660 + +FILTER HAS(doc, "attr") instead of FILTER doc.attr / FILTER doc.attr != null + +collection ... not found error, e.g. access of variable after COLLECT (no longer existing) +{{% /comment %}} diff --git a/site/content/arangodb/oem/aql/data-queries.md b/site/content/arangodb/oem/aql/data-queries.md new file mode 100644 index 0000000000..1b07b4214a --- /dev/null +++ b/site/content/arangodb/oem/aql/data-queries.md @@ -0,0 +1,554 @@ +--- +title: AQL Data Queries +menuTitle: Data Queries +weight: 20 +description: >- + With AQL queries, you can read and write data in the form of documents +--- +There are two fundamental types of AQL queries: +- queries which access data (read documents) +- queries which modify data (create, update, replace, delete documents) + +## Data Access Queries + +Retrieving data from the database with AQL does always include a **RETURN** +operation. It can be used to return a static value, such as a string: + +```aql +RETURN "Hello ArangoDB!" +``` + +The query result is always an array of elements, even if a single element was +returned and contains a single element in that case: `["Hello ArangoDB!"]` + +The function `DOCUMENT()` can be called to retrieve a single document via +its document identifier, for instance: + +```aql +RETURN DOCUMENT("users/phil") +``` + +`RETURN` is usually accompanied by a **FOR** loop to iterate over the +documents of a collection. The following query executes the loop body for all +documents of a collection called `users`. Each document is returned unchanged +in this example: + +```aql +FOR doc IN users + RETURN doc +``` + +Instead of returning the raw `doc`, one can easily create a projection: + +```aql +FOR doc IN users + RETURN { user: doc, newAttribute: true } +``` + +For every user document, an object with two attributes is returned. The value +of the attribute `user` is set to the content of the user document, and +`newAttribute` is a static attribute with the boolean value `true`. + +Operations like **FILTER**, **SORT** and **LIMIT** can be added to the loop body +to narrow and order the result. Instead of above shown call to `DOCUMENT()`, +one can also retrieve the document that describes user `phil` like so: + +```aql +FOR doc IN users + FILTER doc._key == "phil" + RETURN doc +``` + +The document key is used in this example, but any other attribute could equally +be used for filtering. Since the document key is guaranteed to be unique, no +more than a single document can match this filter. For other attributes this +may not be the case. To return a subset of active users (determined by an +attribute called `status`), sorted by name in ascending order, you can do: + +```aql +FOR doc IN users + FILTER doc.status == "active" + SORT doc.name + LIMIT 10 +``` + +Note that operations do not have to occur in a fixed order and that their order +can influence the result significantly. Limiting the number of documents +before a filter is usually not what you want, because it easily misses a lot +of documents that would fulfill the filter criterion, but are ignored because +of a premature `LIMIT` clause. Because of the aforementioned reasons, `LIMIT` +is usually put at the very end, after `FILTER`, `SORT` and other operations. + +See the [High Level Operations](high-level-operations/_index.md) chapter for more details. + +## Data Modification Queries + +AQL supports the following data modification operations: + +- **INSERT**: insert new documents into a collection +- **UPDATE**: partially update existing documents in a collection +- **REPLACE**: completely replace existing documents in a collection +- **REMOVE**: remove existing documents from a collection +- **UPSERT**: conditionally insert or update documents in a collection + +You can use them to modify the data of one or multiple documents with a single +query. This is superior to fetching and updating the documents individually with +multiple queries. However, if only a single document needs to be modified, +ArangoDB's specialized data modification operations for single documents might +execute faster. + +Below you find some simple example queries that use these operations. +The operations are detailed in the chapter [High Level Operations](high-level-operations/_index.md). + +### Modifying a single document + +Let's start with the basics: `INSERT`, `UPDATE` and `REMOVE` operations on single documents. +Here is an example that inserts a document into a collection called `users` with +the [`INSERT` operation](high-level-operations/insert.md): + +```aql +INSERT { + firstName: "Anna", + name: "Pavlova", + profession: "artist" +} INTO users +``` + +The collection needs to exist before executing the query. AQL queries cannot +create collections. + +If you run the above query, the result is an empty array because we did +not specify what to return using a `RETURN` keyword. It is optional in +modification queries, but mandatory in data access queries. Despite the empty +result, the above query still creates a new user document. + +You may provide a key for the new document; if not provided, ArangoDB creates one for you. + +```aql +INSERT { + _key: "GilbertoGil", + firstName: "Gilberto", + name: "Gil", + city: "Fortalezza" +} INTO users +``` + +As ArangoDB is schema-free, attributes of the documents may vary: + +```aql +INSERT { + _key: "PhilCarpenter", + firstName: "Phil", + name: "Carpenter", + middleName: "G.", + status: "inactive" +} INTO users +``` + +```aql +INSERT { + _key: "NatachaDeclerck", + firstName: "Natacha", + name: "Declerck", + location: "Antwerp" +} INTO users +``` + +The [`UPDATE` operation](high-level-operations/update.md) lets you add or change +attributes of existing documents. The following query modifies a previously +created user, changing the `status` attribute and adding a `location` attribute: + +```aql +UPDATE "PhilCarpenter" WITH { + status: "active", + location: "Beijing" +} IN users +``` + +The [`REPLACE` operation](high-level-operations/replace.md) is an alternative to the +`UPDATE` operation that lets you replace all attributes of a document +(except for attributes that cannot be changed, like `_key`): + +```aql +REPLACE { + _key: "NatachaDeclerck", + firstName: "Natacha", + name: "Leclerc", + status: "active", + level: "premium" +} IN users +``` + +You can delete a document with the [`REMOVE` operation](high-level-operations/remove.md), +only requiring the document key to identify it: + +```aql +REMOVE "GilbertoGil" IN users +``` + +### Modifying multiple documents + +Data modification operations are normally combined with `FOR` loops to +iterate over a given list of documents. They can optionally be combined with +`FILTER` statements and the like. + +To create multiple new documents, use the `INSERT` operation together with `FOR`. +You can also use `INSERT` to generate copies of existing documents from other +collections, or to create synthetic documents (e.g. for testing purposes). +The following query creates 1000 test users with some attributes and stores +them in the `users` collection: + +```aql +FOR i IN 1..1000 + INSERT { + id: 100000 + i, + age: 18 + FLOOR(RAND() * 25), + name: CONCAT('test', TO_STRING(i)), + status: i % 2 == 0 ? "active" : "not active", + active: false, + gender: i % 3 == 0 ? "male" : i % 3 == 1 ? "female" : "diverse" + } IN users +``` + +Let's modify existing documents that match some condition: + +```aql +FOR u IN users + FILTER u.status == "not active" + UPDATE u WITH { status: "inactive" } IN users +``` + +You can also update existing attributes based on their previous value: + +```aql +FOR u IN users + FILTER u.active == true + UPDATE u WITH { numberOfLogins: u.numberOfLogins + 1 } IN users +``` + +The above query only works if there is already a `numberOfLogins` attribute +present in the document. If it is unclear whether there is a `numberOfLogins` +attribute in the document, the increase must be made conditional: + +```aql +FOR u IN users + FILTER u.active == true + UPDATE u WITH { + numberOfLogins: HAS(u, "numberOfLogins") ? u.numberOfLogins + 1 : 1 + } IN users +``` + +Updates of multiple attributes can be combined in a single query: + +```aql +FOR u IN users + FILTER u.active == true + UPDATE u WITH { + lastLogin: DATE_NOW(), + numberOfLogins: HAS(u, "numberOfLogins") ? u.numberOfLogins + 1 : 1 + } IN users +``` + +Note than an update query might fail during execution, for example, because a +document to be updated does not exist. In this case, the query aborts at +the first error. In single server mode, all modifications done by the query +are rolled back as if they never happened. + +You can copy documents from one collection to another by reading from one +collection but write to another. +Let's copy the contents of the `users` collection into the `backup` collection: + +```aql +FOR u IN users + INSERT u IN backup +``` + +Note that both collections must already exist when the query is executed. +The query might fail if the `backup` collection already contains documents, +as executing the insert might attempt to insert the same document (identified +by the `_key` attribute) again. This triggers a unique key constraint violation +and aborts the query. In single server mode, all changes made by the query +are also rolled back. +To make such a copy operation work in all cases, the target collection can +be emptied beforehand, using a `REMOVE` query or by truncating it by other means. + +To not just partially update, but completely replace existing documents, use +the `REPLACE` operation. +The following query replaces all documents in the `backup` collection with +the documents found in the `users` collection. Documents common to both +collections are replaced. All other documents remain unchanged. +Documents are compared using their `_key` attributes: + +```aql +FOR u IN users + REPLACE u IN backup +``` + +The above query fails if there are documents in the `users` collection that are +not in the `backup` collection yet. In this case, the query would attempt to replace +documents that do not exist. If such case is detected while executing the query, +the query is aborted. In single server mode, all changes made by the query are +rolled back. + +To make the query succeed regardless of the errors, use the `ignoreErrors` +query option: + +```aql +FOR u IN users + REPLACE u IN backup OPTIONS { ignoreErrors: true } +``` + +This continues the query execution if errors occur during a `REPLACE`, `UPDATE`, +`INSERT`, or `REMOVE` operation. + +Finally, let's find some documents in collection `users` and remove them +from collection `backup`. The link between the documents in both collections is +established via the documents' keys: + +```aql +FOR u IN users + FILTER u.status == "deleted" + REMOVE u IN backup +``` + +The following example removes all documents from both `users` and `backup`: + +```aql +LET r1 = (FOR u IN users REMOVE u IN users) +LET r2 = (FOR u IN backup REMOVE u IN backup) +RETURN true +``` + +### Altering substructures + +To modify lists in documents, for example, to update specific attributes of +objects in an array, you can compute a new array and then update the document +attribute in question. This may involve the use of subqueries and temporary +variables. + +Create a collection named `complexCollection` and run the following query: + +```aql +FOR doc IN [ + { + "topLevelAttribute": "a", + "subList": [ + { + "attributeToAlter": "value to change", + "filterByMe": true + }, + { + "attributeToAlter": "another value to change", + "filterByMe": true + }, + { + "attributeToAlter": "keep this value", + "filterByMe": false + } + ] + }, + { + "topLevelAttribute": "b", + "subList": [ + { + "attributeToAlter": "keep this value", + "filterByMe": false + } + ] + } +] INSERT doc INTO complexCollection +``` + +The following query updates the `subList` top-level attribute of documents. +The `attributeToAlter` values in the nested object are changed if the adjacent +`filterByMe` attribute is `true`: + +```aql +FOR doc in complexCollection + LET alteredList = ( + FOR element IN doc.subList + RETURN element.filterByMe + ? MERGE(element, { attributeToAlter: "new value" }) + : element + ) + UPDATE doc WITH { subList: alteredList } IN complexCollection + RETURN NEW +``` + +```json +[ + { + "_key": "2607", + "_id": "complexCollection/2607", + "_rev": "_fWb_iOO---", + "topLevelAttribute": "a", + "subList": [ + { + "attributeToAlter": "new value", + "filterByMe": true + }, + { + "attributeToAlter": "new value", + "filterByMe": true + }, + { + "attributeToAlter": "keep this value", + "filterByMe": false + } + ] + }, + { + "_key": "2608", + "_id": "complexCollection/2608", + "_rev": "_fWb_iOO--_", + "topLevelAttribute": "b", + "subList": [ + { + "attributeToAlter": "keep this value", + "filterByMe": false + } + ] + } +] +``` + +To improve the query's performance, you can only update documents if there is +a change to the `subList` to be saved. Instead of comparing the current and the +altered list directly, you may compare their hash values using the +[`HASH()` function](functions/miscellaneous.md#hash), which is faster for +larger objects and arrays. You can also replace the subquery with an +[inline expression](operators.md#inline-expressions): + +```aql +FOR doc in complexCollection + LET alteredList = doc.subList[* + RETURN CURRENT.filterByMe + ? MERGE(CURRENT, { attributeToAlter: "new value" }) + : CURRENT + ] + FILTER HASH(doc.subList) != HASH(alteredList) + UPDATE doc WITH { subList: alteredList } IN complexCollection + RETURN NEW +``` + +### Returning documents + +Data modification queries can optionally return documents. In order to reference +the inserted, removed or modified documents in a `RETURN` statement, data modification +statements introduce the `OLD` and/or `NEW` pseudo-values: + +```aql +FOR i IN 1..100 + INSERT { value: i } IN test + RETURN NEW +``` + +```aql +FOR u IN users + FILTER u.status == "deleted" + REMOVE u IN users + RETURN OLD +``` + +```aql +FOR u IN users + FILTER u.status == "not active" + UPDATE u WITH { status: "inactive" } IN users + RETURN NEW +``` + +`NEW` refers to the inserted or modified document revision, and `OLD` refers +to the document revision before update or removal. `INSERT` statements can +only refer to the `NEW` pseudo-value, and `REMOVE` operations only to `OLD`. +`UPDATE`, `REPLACE` and `UPSERT` can refer to either. + +In all cases, the full documents are returned with all their attributes, +including the potentially auto-generated attributes, such as `_id`, `_key`, and `_rev`, +and the attributes not specified in the update expression of a partial update. + +#### Projections of OLD and NEW + +It is possible to return a projection of the documents with `OLD` or `NEW` instead of +returning the entire documents. This can be used to reduce the amount of data returned +by queries. + +For example, the following query returns only the keys of the inserted documents: + +```aql +FOR i IN 1..100 + INSERT { value: i } IN test + RETURN NEW._key +``` + +#### Using OLD and NEW in the same query + +For `UPDATE`, `REPLACE`, and `UPSERT` operations, both `OLD` and `NEW` can be used +to return the previous revision of a document together with the updated revision: + +```aql +FOR u IN users + FILTER u.status == "not active" + UPDATE u WITH { status: "inactive" } IN users + RETURN { old: OLD, new: NEW } +``` + +#### Calculations with OLD or NEW + +It is also possible to run additional calculations with `LET` statements between the +data modification part and the final `RETURN` of an AQL query. For example, the following +query performs an upsert operation and returns whether an existing document was +updated, or a new document was inserted. It does so by checking the `OLD` variable +after the `UPSERT` and using a `LET` statement to store a temporary string for +the operation type: + +```aql +UPSERT { name: "test" } + INSERT { name: "test" } + UPDATE { } IN users +LET opType = IS_NULL(OLD) ? "insert" : "update" +RETURN { _key: NEW._key, type: opType } +``` + +### Restrictions + +The name of the modified collection (`users` and `backup` in the above cases) +must be known to the AQL executor at query-compile time and cannot change at +runtime. Using a bind parameter to specify the +[collection name](../concepts/data-structure/collections.md#collection-names) is allowed. + +It is not possible to use multiple data modification operations for the same +collection in the same query, or follow up a data modification operation for a +specific collection with a read operation for the same collection. Neither is +it possible to follow up any data modification operation with a traversal query +(which may read from arbitrary collections not necessarily known at the start of +the traversal). + +That means you may not place several `REMOVE` or `UPDATE` statements for the same +collection into the same query. It is however possible to modify different collections +by using multiple data modification operations for different collections in the +same query. +In case you have a query with several places that need to remove documents from the +same collection, it is recommended to collect these documents or their keys in an array +and have the documents from that array removed using a single `REMOVE` operation. + +Data modification operations can optionally be followed by `LET` operations to +perform further calculations and a `RETURN` operation to return data. + +### Transactional Execution + +On a single server, data modification operations are executed transactionally. +If a data modification operation fails, any changes made by it are rolled +back automatically as if they never happened. + +A query may execute intermediate transaction commits in case the running +transaction (AQL query) hits the specified size thresholds. In this case, the +query's operations carried out so far are committed and not rolled back in case +of a later abort/rollback. This behavior can be controlled by adjusting the +intermediate commit settings for the RocksDB engine. See +[Known limitations for AQL queries](fundamentals/limitations.md#storage-engine-properties). + +In a cluster, AQL data modification queries are not executed transactionally. +Additionally, AQL queries with `UPDATE`, `REPLACE`, `UPSERT`, or `REMOVE` +operations require the `_key` attribute to be specified for all documents that +should be modified or removed, even if a shard key attribute other than `_key` +is chosen for the collection. diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/_index.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/_index.md new file mode 100644 index 0000000000..bd78af7eee --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/_index.md @@ -0,0 +1,115 @@ +--- +title: AQL Query Patterns and Examples +menuTitle: Examples & Query Patterns +weight: 40 +description: >- + Create test data, count documents, use joins, group attributes, traverse + graphs, and other examples +--- +These pages contain some common query patterns with examples. For better +understandability the query results are also included directly below each query. + +Normally, you would want to run queries on data stored in collections. +This section will provide several examples for that. + +Some of the following example queries are executed on a collection _users_ +with the data provided here below. + +## Things to consider when running queries on collections + +Note that all documents created in any collections will automatically get the +following server-generated attributes: + +- `_id`: A unique id, consisting of [collection name](../../concepts/data-structure/collections.md#collection-names) + and a server-side sequence value +- `_key`: The server sequence value +- `_rev`: The document's revision id + +Whenever you run queries on the documents in collections, don't be surprised if +these additional attributes are returned as well. + +Please also note that with real-world data, you might want to create additional +indexes on the data (left out here for brevity). Adding indexes on attributes that are +used in `FILTER` statements may considerably speed up queries. Furthermore, instead of +using attributes such as `id`, `from` and `to`, you might want to use the built-in +`_id`, `_from` and `_to` attributes. Finally, [edge collections](../../concepts/data-models.md#graph-model) +provide a nice way of establishing references/links between documents. +These features have been left out here for brevity as well. + +## Example data + +Some of the following example queries are executed on a collection *users* +with the following initial data: + +```json +[ + { "id": 100, "name": "John", "age": 37, "active": true, "gender": "m" }, + { "id": 101, "name": "Fred", "age": 36, "active": true, "gender": "m" }, + { "id": 102, "name": "Jacob", "age": 35, "active": false, "gender": "m" }, + { "id": 103, "name": "Ethan", "age": 34, "active": false, "gender": "m" }, + { "id": 104, "name": "Michael", "age": 33, "active": true, "gender": "m" }, + { "id": 105, "name": "Alexander", "age": 32, "active": true, "gender": "m" }, + { "id": 106, "name": "Daniel", "age": 31, "active": true, "gender": "m" }, + { "id": 107, "name": "Anthony", "age": 30, "active": true, "gender": "m" }, + { "id": 108, "name": "Jim", "age": 29, "active": true, "gender": "m" }, + { "id": 109, "name": "Diego", "age": 28, "active": true, "gender": "m" }, + { "id": 200, "name": "Sophia", "age": 37, "active": true, "gender": "f" }, + { "id": 201, "name": "Emma", "age": 36, "active": true, "gender": "f" }, + { "id": 202, "name": "Olivia", "age": 35, "active": false, "gender": "f" }, + { "id": 203, "name": "Madison", "age": 34, "active": true, "gender": "x" }, + { "id": 204, "name": "Chloe", "age": 33, "active": true, "gender": "f" }, + { "id": 205, "name": "Eva", "age": 32, "active": false, "gender": "f" }, + { "id": 206, "name": "Abigail", "age": 31, "active": true, "gender": "f" }, + { "id": 207, "name": "Isabella", "age": 30, "active": true, "gender": "f" }, + { "id": 208, "name": "Mary", "age": 29, "active": true, "gender": "f" }, + { "id": 209, "name": "Mariah", "age": 28, "active": true, "gender": "f" } +] +``` + +For some of the examples, we'll also use a collection *relations* to store +relationships between users. The example data for *relations* are as follows: + +```json +[ + { "from": 209, "to": 205, "type": "friend" }, + { "from": 206, "to": 108, "type": "friend" }, + { "from": 202, "to": 204, "type": "friend" }, + { "from": 200, "to": 100, "type": "friend" }, + { "from": 205, "to": 101, "type": "friend" }, + { "from": 209, "to": 203, "type": "friend" }, + { "from": 200, "to": 203, "type": "friend" }, + { "from": 100, "to": 208, "type": "friend" }, + { "from": 101, "to": 209, "type": "friend" }, + { "from": 206, "to": 102, "type": "friend" }, + { "from": 104, "to": 100, "type": "friend" }, + { "from": 104, "to": 108, "type": "friend" }, + { "from": 108, "to": 209, "type": "friend" }, + { "from": 206, "to": 106, "type": "friend" }, + { "from": 204, "to": 105, "type": "friend" }, + { "from": 208, "to": 207, "type": "friend" }, + { "from": 102, "to": 108, "type": "friend" }, + { "from": 207, "to": 203, "type": "friend" }, + { "from": 203, "to": 106, "type": "friend" }, + { "from": 202, "to": 108, "type": "friend" }, + { "from": 201, "to": 203, "type": "friend" }, + { "from": 105, "to": 100, "type": "friend" }, + { "from": 100, "to": 109, "type": "friend" }, + { "from": 207, "to": 109, "type": "friend" }, + { "from": 103, "to": 203, "type": "friend" }, + { "from": 208, "to": 104, "type": "friend" }, + { "from": 105, "to": 104, "type": "friend" }, + { "from": 103, "to": 208, "type": "friend" }, + { "from": 203, "to": 107, "type": "boyfriend" }, + { "from": 107, "to": 203, "type": "girlfriend" }, + { "from": 208, "to": 109, "type": "boyfriend" }, + { "from": 109, "to": 208, "type": "girlfriend" }, + { "from": 106, "to": 205, "type": "girlfriend" }, + { "from": 205, "to": 106, "type": "boyfriend" }, + { "from": 103, "to": 209, "type": "girlfriend" }, + { "from": 209, "to": 103, "type": "boyfriend" }, + { "from": 201, "to": 102, "type": "boyfriend" }, + { "from": 102, "to": 201, "type": "girlfriend" }, + { "from": 206, "to": 100, "type": "boyfriend" }, + { "from": 100, "to": 206, "type": "girlfriend" } +] +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/actors-and-movies-dataset-queries.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/actors-and-movies-dataset-queries.md new file mode 100644 index 0000000000..ef1c1f17d5 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/actors-and-movies-dataset-queries.md @@ -0,0 +1,859 @@ +--- +title: AQL Example Queries on an Actors and Movies Dataset +menuTitle: Actors & Movies Dataset Queries +weight: 35 +description: >- + Example queries showing different AQL query features and combinations of them +--- +Given a graph `[actors] – actsIn → [movies]` with two vertex collections +**actors** and **movies** and an edge collection **actsIn** with edges pointing +from actor to movie, plenty of interesting queries are possible: + +- All actors who acted in "movie1" OR "movie2" +- All actors who acted in both "movie1" AND "movie2" +- All common movies between "actor1" and "actor2" +- All actors who acted in 3 or more movies +- All movies where exactly 6 actors acted in +- The number of actors by movie +- The number of movies by actor +- The number of movies acted in between two years by actor +- The years and number of movies by actor with actor name + +## Dataset + +We will be using _arangosh_ to create and query the data. All AQL queries are +strings and can simply be copied over to the web interface or your favorite +driver as well. + +```js +var actors = db._create("actors"); +var movies = db._create("movies"); +var actsIn = db._createEdgeCollection("actsIn"); + +var TheMatrix = movies.save({ _key: "TheMatrix", title: "The Matrix", released: 1999, tagline: "Welcome to the Real World" })._id; +var Keanu = actors.save({ _key: "Keanu", name: "Keanu Reeves", born: 1964 })._id; +var Carrie = actors.save({ _key: "Carrie", name: "Carrie-Anne Moss", born: 1967 })._id; +var Laurence = actors.save({ _key: "Laurence", name: "Laurence Fishburne", born: 1961 })._id; +var Hugo = actors.save({ _key: "Hugo", name: "Hugo Weaving", born: 1960 })._id; +var Emil = actors.save({ _key: "Emil", name: "Emil Eifrem", born: 1978 }); + +actsIn.save(Keanu, TheMatrix, { roles: ["Neo"], year: 1999 }); +actsIn.save(Carrie, TheMatrix, { roles: ["Trinity"], year: 1999 }); +actsIn.save(Laurence, TheMatrix, { roles: ["Morpheus"], year: 1999 }); +actsIn.save(Hugo, TheMatrix, { roles: ["Agent Smith"], year: 1999 }); +actsIn.save(Emil, TheMatrix, { roles: ["Emil"], year: 1999 }); + +var TheMatrixReloaded = movies.save({ _key: "TheMatrixReloaded", title: "The Matrix Reloaded", released: 2003, tagline: "Free your mind" }); +actsIn.save(Keanu, TheMatrixReloaded, { roles: ["Neo"], year: 2003 }); +actsIn.save(Carrie, TheMatrixReloaded, { roles: ["Trinity"], year: 2003 }); +actsIn.save(Laurence, TheMatrixReloaded, { roles: ["Morpheus"], year: 2003 }); +actsIn.save(Hugo, TheMatrixReloaded, { roles: ["Agent Smith"], year: 2003 }); + +var TheMatrixRevolutions = movies.save({ _key: "TheMatrixRevolutions", title: "The Matrix Revolutions", released: 2003, tagline: "Everything that has a beginning has an end" }); +actsIn.save(Keanu, TheMatrixRevolutions, { roles: ["Neo"], year: 2003 }); +actsIn.save(Carrie, TheMatrixRevolutions, { roles: ["Trinity"], year: 2003 }); +actsIn.save(Laurence, TheMatrixRevolutions, { roles: ["Morpheus"], year: 2003 }); +actsIn.save(Hugo, TheMatrixRevolutions, { roles: ["Agent Smith"], year: 2003 }); + +var TheDevilsAdvocate = movies.save({ _key: "TheDevilsAdvocate", title: "The Devil's Advocate", released: 1997, tagline: "Evil has its winning ways" })._id; +var Charlize = actors.save({ _key: "Charlize", name: "Charlize Theron", born: 1975 })._id; +var Al = actors.save({ _key: "Al", name: "Al Pacino", born: 1940 })._id; +actsIn.save(Keanu, TheDevilsAdvocate, { roles: ["Kevin Lomax"], year: 1997 }); +actsIn.save(Charlize, TheDevilsAdvocate, { roles: ["Mary Ann Lomax"], year: 1997 }); +actsIn.save(Al, TheDevilsAdvocate, { roles: ["John Milton"], year: 1997 }); + +var AFewGoodMen = movies.save({ _key: "AFewGoodMen", title: "A Few Good Men", released: 1992, tagline: "In the heart of the nation's capital, in a courthouse of the U.S. government, one man will stop at nothing to keep his honor, and one will stop at nothing to find the truth." })._id; +var TomC = actors.save({ _key: "TomC", name: "Tom Cruise", born: 1962 })._id; +var JackN = actors.save({ _key: "JackN", name: "Jack Nicholson", born: 1937 })._id; +var DemiM = actors.save({ _key: "DemiM", name: "Demi Moore", born: 1962 })._id; +var KevinB = actors.save({ _key: "KevinB", name: "Kevin Bacon", born: 1958 })._id; +var KieferS = actors.save({ _key: "KieferS", name: "Kiefer Sutherland", born: 1966 })._id; +var NoahW = actors.save({ _key: "NoahW", name: "Noah Wyle", born: 1971 })._id; +var CubaG = actors.save({ _key: "CubaG", name: "Cuba Gooding Jr.", born: 1968 })._id; +var KevinP = actors.save({ _key: "KevinP", name: "Kevin Pollak", born: 1957 })._id; +var JTW = actors.save({ _key: "JTW", name: "J.T. Walsh", born: 1943 })._id; +var JamesM = actors.save({ _key: "JamesM", name: "James Marshall", born: 1967 })._id; +var ChristopherG = actors.save({ _key: "ChristopherG", name: "Christopher Guest", born: 1948 })._id; +actsIn.save(TomC, AFewGoodMen, { roles: ["Lt. Daniel Kaffee"], year: 1992 }); +actsIn.save(JackN, AFewGoodMen, { roles: ["Col. Nathan R. Jessup"], year: 1992 }); +actsIn.save(DemiM, AFewGoodMen, { roles: ["Lt. Cdr. JoAnne Galloway"], year: 1992 }); +actsIn.save(KevinB, AFewGoodMen, { roles: ["Capt. Jack Ross"], year: 1992 }); +actsIn.save(KieferS, AFewGoodMen, { roles: ["Lt. Jonathan Kendrick"], year: 1992 }); +actsIn.save(NoahW, AFewGoodMen, { roles: ["Cpl. Jeffrey Barnes"], year: 1992 }); +actsIn.save(CubaG, AFewGoodMen, { roles: ["Cpl. Carl Hammaker"], year: 1992 }); +actsIn.save(KevinP, AFewGoodMen, { roles: ["Lt. Sam Weinberg"], year: 1992 }); +actsIn.save(JTW, AFewGoodMen, { roles: ["Lt. Col. Matthew Andrew Markinson"], year: 1992 }); +actsIn.save(JamesM, AFewGoodMen, { roles: ["Pfc. Louden Downey"], year: 1992 }); +actsIn.save(ChristopherG, AFewGoodMen, { roles: ["Dr. Stone"], year: 1992 }); + +var TopGun = movies.save({ _key: "TopGun", title: "Top Gun", released: 1986, tagline: "I feel the need, the need for speed." })._id; +var KellyM = actors.save({ _key: "KellyM", name: "Kelly McGillis", born: 1957 })._id; +var ValK = actors.save({ _key: "ValK", name: "Val Kilmer", born: 1959 })._id; +var AnthonyE = actors.save({ _key: "AnthonyE", name: "Anthony Edwards", born: 1962 })._id; +var TomS = actors.save({ _key: "TomS", name: "Tom Skerritt", born: 1933 })._id; +var MegR = actors.save({ _key: "MegR", name: "Meg Ryan", born: 1961 })._id; +actsIn.save(TomC, TopGun, { roles: ["Maverick"], year: 1986 }); +actsIn.save(KellyM, TopGun, { roles: ["Charlie"], year: 1986 }); +actsIn.save(ValK, TopGun, { roles: ["Iceman"], year: 1986 }); +actsIn.save(AnthonyE, TopGun, { roles: ["Goose"], year: 1986 }); +actsIn.save(TomS, TopGun, { roles: ["Viper"], year: 1986 }); +actsIn.save(MegR, TopGun, { roles: ["Carole"], year: 1986 }); + +var JerryMaguire = movies.save({ _key: "JerryMaguire", title: "Jerry Maguire", released: 2000, tagline: "The rest of his life begins now." })._id; +var ReneeZ = actors.save({ _key: "ReneeZ", name: "Renee Zellweger", born: 1969 })._id; +var KellyP = actors.save({ _key: "KellyP", name: "Kelly Preston", born: 1962 })._id; +var JerryO = actors.save({ _key: "JerryO", name: "Jerry O'Connell", born: 1974 })._id; +var JayM = actors.save({ _key: "JayM", name: "Jay Mohr", born: 1970 })._id; +var BonnieH = actors.save({ _key: "BonnieH", name: "Bonnie Hunt", born: 1961 })._id; +var ReginaK = actors.save({ _key: "ReginaK", name: "Regina King", born: 1971 })._id; +var JonathanL = actors.save({ _key: "JonathanL", name: "Jonathan Lipnicki", born: 1996 })._id; +actsIn.save(TomC, JerryMaguire, { roles: ["Jerry Maguire"], year: 2000 }); +actsIn.save(CubaG, JerryMaguire, { roles: ["Rod Tidwell"], year: 2000 }); +actsIn.save(ReneeZ, JerryMaguire, { roles: ["Dorothy Boyd"], year: 2000 }); +actsIn.save(KellyP, JerryMaguire, { roles: ["Avery Bishop"], year: 2000 }); +actsIn.save(JerryO, JerryMaguire, { roles: ["Frank Cushman"], year: 2000 }); +actsIn.save(JayM, JerryMaguire, { roles: ["Bob Sugar"], year: 2000 }); +actsIn.save(BonnieH, JerryMaguire, { roles: ["Laurel Boyd"], year: 2000 }); +actsIn.save(ReginaK, JerryMaguire, { roles: ["Marcee Tidwell"], year: 2000 }); +actsIn.save(JonathanL, JerryMaguire, { roles: ["Ray Boyd"], year: 2000 }); + +var StandByMe = movies.save({ _key: "StandByMe", title: "Stand By Me", released: 1986, tagline: "For some, it's the last real taste of innocence, and the first real taste of life. But for everyone, it's the time that memories are made of." })._id; +var RiverP = actors.save({ _key: "RiverP", name: "River Phoenix", born: 1970 })._id; +var CoreyF = actors.save({ _key: "CoreyF", name: "Corey Feldman", born: 1971 })._id; +var WilW = actors.save({ _key: "WilW", name: "Wil Wheaton", born: 1972 })._id; +var JohnC = actors.save({ _key: "JohnC", name: "John Cusack", born: 1966 })._id; +var MarshallB = actors.save({ _key: "MarshallB", name: "Marshall Bell", born: 1942 })._id; +actsIn.save(WilW, StandByMe, { roles: ["Gordie Lachance"], year: 1986 }); +actsIn.save(RiverP, StandByMe, { roles: ["Chris Chambers"], year: 1986 }); +actsIn.save(JerryO, StandByMe, { roles: ["Vern Tessio"], year: 1986 }); +actsIn.save(CoreyF, StandByMe, { roles: ["Teddy Duchamp"], year: 1986 }); +actsIn.save(JohnC, StandByMe, { roles: ["Denny Lachance"], year: 1986 }); +actsIn.save(KieferS, StandByMe, { roles: ["Ace Merrill"], year: 1986 }); +actsIn.save(MarshallB, StandByMe, { roles: ["Mr. Lachance"], year: 1986 }); + +var AsGoodAsItGets = movies.save({ _key: "AsGoodAsItGets", title: "As Good as It Gets", released: 1997, tagline: "A comedy from the heart that goes for the throat." })._id; +var HelenH = actors.save({ _key: "HelenH", name: "Helen Hunt", born: 1963 })._id; +var GregK = actors.save({ _key: "GregK", name: "Greg Kinnear", born: 1963 })._id; +actsIn.save(JackN, AsGoodAsItGets, { roles: ["Melvin Udall"], year: 1997 }); +actsIn.save(HelenH, AsGoodAsItGets, { roles: ["Carol Connelly"], year: 1997 }); +actsIn.save(GregK, AsGoodAsItGets, { roles: ["Simon Bishop"], year: 1997 }); +actsIn.save(CubaG, AsGoodAsItGets, { roles: ["Frank Sachs"], year: 1997 }); + +var WhatDreamsMayCome = movies.save({ _key: "WhatDreamsMayCome", title: "What Dreams May Come", released: 1998, tagline: "After life there is more. The end is just the beginning." })._id; +var AnnabellaS = actors.save({ _key: "AnnabellaS", name: "Annabella Sciorra", born: 1960 })._id; +var MaxS = actors.save({ _key: "MaxS", name: "Max von Sydow", born: 1929 })._id; +var WernerH = actors.save({ _key: "WernerH", name: "Werner Herzog", born: 1942 })._id; +var Robin = actors.save({ _key: "Robin", name: "Robin Williams", born: 1951 })._id; +actsIn.save(Robin, WhatDreamsMayCome, { roles: ["Chris Nielsen"], year: 1998 }); +actsIn.save(CubaG, WhatDreamsMayCome, { roles: ["Albert Lewis"], year: 1998 }); +actsIn.save(AnnabellaS, WhatDreamsMayCome, { roles: ["Annie Collins-Nielsen"], year: 1998 }); +actsIn.save(MaxS, WhatDreamsMayCome, { roles: ["The Tracker"], year: 1998 }); +actsIn.save(WernerH, WhatDreamsMayCome, { roles: ["The Face"], year: 1998 }); + +var SnowFallingonCedars = movies.save({ _key: "SnowFallingonCedars", title: "Snow Falling on Cedars", released: 1999, tagline: "First loves last. Forever." })._id; +var EthanH = actors.save({ _key: "EthanH", name: "Ethan Hawke", born: 1970 })._id; +var RickY = actors.save({ _key: "RickY", name: "Rick Yune", born: 1971 })._id; +var JamesC = actors.save({ _key: "JamesC", name: "James Cromwell", born: 1940 })._id; +actsIn.save(EthanH, SnowFallingonCedars, { roles: ["Ishmael Chambers"], year: 1999 }); +actsIn.save(RickY, SnowFallingonCedars, { roles: ["Kazuo Miyamoto"], year: 1999 }); +actsIn.save(MaxS, SnowFallingonCedars, { roles: ["Nels Gudmundsson"], year: 1999 }); +actsIn.save(JamesC, SnowFallingonCedars, { roles: ["Judge Fielding"], year: 1999 }); + +var YouveGotMail = movies.save({ _key: "YouveGotMail", title: "You've Got Mail", released: 1998, tagline: "At odds in life... in love on-line." })._id; +var ParkerP = actors.save({ _key: "ParkerP", name: "Parker Posey", born: 1968 })._id; +var DaveC = actors.save({ _key: "DaveC", name: "Dave Chappelle", born: 1973 })._id; +var SteveZ = actors.save({ _key: "SteveZ", name: "Steve Zahn", born: 1967 })._id; +var TomH = actors.save({ _key: "TomH", name: "Tom Hanks", born: 1956 })._id; +actsIn.save(TomH, YouveGotMail, { roles: ["Joe Fox"], year: 1998 }); +actsIn.save(MegR, YouveGotMail, { roles: ["Kathleen Kelly"], year: 1998 }); +actsIn.save(GregK, YouveGotMail, { roles: ["Frank Navasky"], year: 1998 }); +actsIn.save(ParkerP, YouveGotMail, { roles: ["Patricia Eden"], year: 1998 }); +actsIn.save(DaveC, YouveGotMail, { roles: ["Kevin Jackson"], year: 1998 }); +actsIn.save(SteveZ, YouveGotMail, { roles: ["George Pappas"], year: 1998 }); + +var SleeplessInSeattle = movies.save({ _key: "SleeplessInSeattle", title: "Sleepless in Seattle", released: 1993, tagline: "What if someone you never met, someone you never saw, someone you never knew was the only someone for you?" })._id; +var RitaW = actors.save({ _key: "RitaW", name: "Rita Wilson", born: 1956 })._id; +var BillPull = actors.save({ _key: "BillPull", name: "Bill Pullman", born: 1953 })._id; +var VictorG = actors.save({ _key: "VictorG", name: "Victor Garber", born: 1949 })._id; +var RosieO = actors.save({ _key: "RosieO", name: "Rosie O'Donnell", born: 1962 })._id; +actsIn.save(TomH, SleeplessInSeattle, { roles: ["Sam Baldwin"], year: 1993 }); +actsIn.save(MegR, SleeplessInSeattle, { roles: ["Annie Reed"], year: 1993 }); +actsIn.save(RitaW, SleeplessInSeattle, { roles: ["Suzy"], year: 1993 }); +actsIn.save(BillPull, SleeplessInSeattle, { roles: ["Walter"], year: 1993 }); +actsIn.save(VictorG, SleeplessInSeattle, { roles: ["Greg"], year: 1993 }); +actsIn.save(RosieO, SleeplessInSeattle, { roles: ["Becky"], year: 1993 }); + +var JoeVersustheVolcano = movies.save({ _key: "JoeVersustheVolcano", title: "Joe Versus the Volcano", released: 1990, tagline: "A story of love, lava and burning desire." })._id; +var Nathan = actors.save({ _key: "Nathan", name: "Nathan Lane", born: 1956 })._id; +actsIn.save(TomH, JoeVersustheVolcano, { roles: ["Joe Banks"], year: 1990 }); +actsIn.save(MegR, JoeVersustheVolcano, { roles: ["DeDe", "Angelica Graynamore", "Patricia Graynamore"], year: 1990 }); +actsIn.save(Nathan, JoeVersustheVolcano, { roles: ["Baw"], year: 1990 }); + +var WhenHarryMetSally = movies.save({ _key: "WhenHarryMetSally", title: "When Harry Met Sally", released: 1998, tagline: "At odds in life... in love on-line." })._id; +var BillyC = actors.save({ _key: "BillyC", name: "Billy Crystal", born: 1948 })._id; +var CarrieF = actors.save({ _key: "CarrieF", name: "Carrie Fisher", born: 1956 })._id; +var BrunoK = actors.save({ _key: "BrunoK", name: "Bruno Kirby", born: 1949 })._id; +actsIn.save(BillyC, WhenHarryMetSally, { roles: ["Harry Burns"], year: 1998 }); +actsIn.save(MegR, WhenHarryMetSally, { roles: ["Sally Albright"], year: 1998 }); +actsIn.save(CarrieF, WhenHarryMetSally, { roles: ["Marie"], year: 1998 }); +actsIn.save(BrunoK, WhenHarryMetSally, { roles: ["Jess"], year: 1998 }); +``` + +## Example queries + +### All actors who acted in "movie1" OR "movie2" + +Say we want to find all actors who acted in "TheMatrix" OR "TheDevilsAdvocate". +First lets try to get all actors for one movie: + +```js +db._query(` + FOR x IN ANY 'movies/TheMatrix' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN x._id +`).toArray(); +``` + +Result: + +```json +[ + [ + "actors/Keanu", + "actors/Hugo", + "actors/Emil", + "actors/Carrie", + "actors/Laurence" + ] +] +``` + +Now we continue to form a `UNION_DISTINCT` of two neighbor queries which will +be the solution: + +```js +db._query(` + FOR x IN UNION_DISTINCT( + (FOR y IN ANY 'movies/TheMatrix' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id), + (FOR y IN ANY 'movies/TheDevilsAdvocate' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id) + ) RETURN x +`).toArray(); +``` + +```json +[ + [ + "actors/Emil", + "actors/Hugo", + "actors/Carrie", + "actors/Laurence", + "actors/Keanu", + "actors/Al", + "actors/Charlize" + ] +] +``` + +### All actors who acted in both "movie1" AND "movie2" + +This is almost identical to the question above. +But this time we are not interested in a `UNION` but in an `INTERSECTION`: + +```js +db._query(` + FOR x IN INTERSECTION( + (FOR y IN ANY 'movies/TheMatrix' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id), + (FOR y IN ANY 'movies/TheDevilsAdvocate' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id) + ) RETURN x +`).toArray(); +``` + +```json +[ + [ + "actors/Keanu" + ] +] +``` + +### All common movies between "actor1" and "actor2" + +This is actually identical to the question about common actors in movie1 and +movie2. We just have to change the starting vertices. As an example let us find +all movies where Hugo Weaving and Keanu Reeves are co-starring: + +```js +db._query(` + FOR x IN INTERSECTION( + (FOR y IN ANY 'actors/Hugo' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id), + (FOR y IN ANY 'actors/Keanu' actsIn + OPTIONS { order: 'bfs', uniqueVertices: 'global' } + RETURN y._id) + ) RETURN x +`).toArray(); +``` + +```json +[ + [ + "movies/TheMatrixRevolutions", + "movies/TheMatrixReloaded", + "movies/TheMatrix" + ] +] +``` + +### All actors who acted in 3 or more movies + +Will make use of the edge index and the `COLLECT` statement of AQL for +grouping. The basic idea is to group all edges by their start vertex +(which in this dataset is always the actor). Then we remove all actors with +less than 3 movies from the result. Below query also returns the computed +number of movies an actor has acted in: + +```js +db._query(` + FOR x IN actsIn + COLLECT actor = x._from WITH COUNT INTO counter + FILTER counter >= 3 + RETURN { actor: actor, movies: counter } +`).toArray(); +``` + +```json +[ + { + "actor" : "actors/Carrie", + "movies" : 3 + }, + { + "actor" : "actors/CubaG", + "movies" : 4 + }, + { + "actor" : "actors/Hugo", + "movies" : 3 + }, + { + "actor" : "actors/Keanu", + "movies" : 4 + }, + { + "actor" : "actors/Laurence", + "movies" : 3 + }, + { + "actor" : "actors/MegR", + "movies" : 5 + }, + { + "actor" : "actors/TomC", + "movies" : 3 + }, + { + "actor" : "actors/TomH", + "movies" : 3 + } +] +``` + +### All movies where exactly 6 actors acted in + +The same idea as in the query before, but with equality filter, however now we +need the movie instead of the actor, so we return the `_to` attribute: + +```js +db._query(` + FOR x IN actsIn + COLLECT movie = x._to WITH COUNT INTO counter + FILTER counter == 6 + RETURN movie +`).toArray(); +``` + +```json +[ + "movies/SleeplessInSeattle", + "movies/TopGun", + "movies/YouveGotMail" +] +``` + +### The number of actors by movie + +We remember in our dataset `_to` on the edge corresponds to the movie, so we +count how often the same `_to` appears. This is the number of actors. The query +is almost identical to the ones before but without the `FILTER` after `COLLECT`: + +```js +db._query(` + FOR x IN actsIn + COLLECT movie = x._to WITH COUNT INTO counter + RETURN { movie: movie, actors: counter } +`).toArray(); +``` + +```json +[ + { + "movie" : "movies/AFewGoodMen", + "actors" : 11 + }, + { + "movie" : "movies/AsGoodAsItGets", + "actors" : 4 + }, + { + "movie" : "movies/JerryMaguire", + "actors" : 9 + }, + { + "movie" : "movies/JoeVersustheVolcano", + "actors" : 3 + }, + { + "movie" : "movies/SleeplessInSeattle", + "actors" : 6 + }, + { + "movie" : "movies/SnowFallingonCedars", + "actors" : 4 + }, + { + "movie" : "movies/StandByMe", + "actors" : 7 + }, + { + "movie" : "movies/TheDevilsAdvocate", + "actors" : 3 + }, + { + "movie" : "movies/TheMatrix", + "actors" : 5 + }, + { + "movie" : "movies/TheMatrixReloaded", + "actors" : 4 + }, + { + "movie" : "movies/TheMatrixRevolutions", + "actors" : 4 + }, + { + "movie" : "movies/TopGun", + "actors" : 6 + }, + { + "movie" : "movies/WhatDreamsMayCome", + "actors" : 5 + }, + { + "movie" : "movies/WhenHarryMetSally", + "actors" : 4 + }, + { + "movie" : "movies/YouveGotMail", + "actors" : 6 + } +] +``` + +### The number of movies by actor + +The `_to` attribute on the edge corresponds to the actor, so we group by it and +count with `COLLECT`. As a bonus, we can add sorting to return the actors with +the most movies first: + +```js +db._query(` + FOR x IN actsIn + COLLECT actor = x._from WITH COUNT INTO counter + SORT counter DESC + RETURN { actor: actor, movies: counter } +`).toArray(); +``` + +```json +[ + { + "actor" : "actors/MegR", + "movies" : 5 + }, + { + "actor" : "actors/Keanu", + "movies" : 4 + }, + { + "actor" : "actors/CubaG", + "movies" : 4 + }, + { + "actor" : "actors/Carrie", + "movies" : 3 + }, + { + "actor" : "actors/Laurence", + "movies" : 3 + }, + { + "actor" : "actors/Hugo", + "movies" : 3 + }, + { + "actor" : "actors/TomC", + "movies" : 3 + }, + { + "actor" : "actors/TomH", + "movies" : 3 + }, + { + "actor" : "actors/JerryO", + "movies" : 2 + }, + { + "actor" : "actors/GregK", + "movies" : 2 + }, + { + "actor" : "actors/MaxS", + "movies" : 2 + }, + { + "actor" : "actors/JackN", + "movies" : 2 + }, + { + "actor" : "actors/KieferS", + "movies" : 2 + }, + { + "actor" : "actors/JamesM", + "movies" : 1 + }, + { + "actor" : "actors/JayM", + "movies" : 1 + }, + { + "actor" : "actors/ReneeZ", + "movies" : 1 + }, + { + "actor" : "actors/JamesC", + "movies" : 1 + }, + { + "actor" : "actors/TomS", + "movies" : 1 + }, + { + "actor" : "actors/AnthonyE", + "movies" : 1 + }, + { + "actor" : "actors/ValK", + "movies" : 1 + }, + { + "actor" : "actors/KellyM", + "movies" : 1 + }, + { + "actor" : "actors/ChristopherG", + "movies" : 1 + }, + { + "actor" : "actors/Al", + "movies" : 1 + }, + { + "actor" : "actors/JTW", + "movies" : 1 + }, + { + "actor" : "actors/KevinP", + "movies" : 1 + }, + { + "actor" : "actors/Emil", + "movies" : 1 + }, + { + "actor" : "actors/NoahW", + "movies" : 1 + }, + { + "actor" : "actors/Charlize", + "movies" : 1 + }, + { + "actor" : "actors/KevinB", + "movies" : 1 + }, + { + "actor" : "actors/DemiM", + "movies" : 1 + }, + { + "actor" : "actors/WernerH", + "movies" : 1 + }, + { + "actor" : "actors/CarrieF", + "movies" : 1 + }, + { + "actor" : "actors/BillyC", + "movies" : 1 + }, + { + "actor" : "actors/Nathan", + "movies" : 1 + }, + { + "actor" : "actors/RosieO", + "movies" : 1 + }, + { + "actor" : "actors/VictorG", + "movies" : 1 + }, + { + "actor" : "actors/BillPull", + "movies" : 1 + }, + { + "actor" : "actors/RitaW", + "movies" : 1 + }, + { + "actor" : "actors/SteveZ", + "movies" : 1 + }, + { + "actor" : "actors/DaveC", + "movies" : 1 + }, + { + "actor" : "actors/ParkerP", + "movies" : 1 + }, + { + "actor" : "actors/RickY", + "movies" : 1 + }, + { + "actor" : "actors/EthanH", + "movies" : 1 + }, + { + "actor" : "actors/KellyP", + "movies" : 1 + }, + { + "actor" : "actors/AnnabellaS", + "movies" : 1 + }, + { + "actor" : "actors/Robin", + "movies" : 1 + }, + { + "actor" : "actors/HelenH", + "movies" : 1 + }, + { + "actor" : "actors/MarshallB", + "movies" : 1 + }, + { + "actor" : "actors/JohnC", + "movies" : 1 + }, + { + "actor" : "actors/CoreyF", + "movies" : 1 + }, + { + "actor" : "actors/RiverP", + "movies" : 1 + }, + { + "actor" : "actors/WilW", + "movies" : 1 + }, + { + "actor" : "actors/JonathanL", + "movies" : 1 + }, + { + "actor" : "actors/ReginaK", + "movies" : 1 + }, + { + "actor" : "actors/BonnieH", + "movies" : 1 + }, + { + "actor" : "actors/BrunoK", + "movies" : 1 + } +] +``` + +### The number of movies acted in between two years by actor + +This query is where a multi-model database actually shines. +First of all we want to use it in production, so we set a persistent index on year. +This allows as to execute fast range queries like between 1990 and 1995. + +```js +db.actsIn.ensureIndex({ type: "persistent", fields: ["year"] }); +``` + +Now we slightly modify our movies by actor query. + +```js +db._query(` + FOR x IN actsIn + FILTER x.year >= 1990 && x.year <= 1995 + COLLECT actor = x._from WITH COUNT INTO counter + RETURN { actor: actor, movies: counter } +`).toArray(); +``` + +```json +[ + { + "actor" : "actors/BillPull", + "movies" : 1 + }, + { + "actor" : "actors/ChristopherG", + "movies" : 1 + }, + { + "actor" : "actors/CubaG", + "movies" : 1 + }, + { + "actor" : "actors/DemiM", + "movies" : 1 + }, + { + "actor" : "actors/JackN", + "movies" : 1 + }, + { + "actor" : "actors/JamesM", + "movies" : 1 + }, + { + "actor" : "actors/JTW", + "movies" : 1 + }, + { + "actor" : "actors/KevinB", + "movies" : 1 + }, + { + "actor" : "actors/KevinP", + "movies" : 1 + }, + { + "actor" : "actors/KieferS", + "movies" : 1 + }, + { + "actor" : "actors/MegR", + "movies" : 2 + }, + { + "actor" : "actors/Nathan", + "movies" : 1 + }, + { + "actor" : "actors/NoahW", + "movies" : 1 + }, + { + "actor" : "actors/RitaW", + "movies" : 1 + }, + { + "actor" : "actors/RosieO", + "movies" : 1 + }, + { + "actor" : "actors/TomC", + "movies" : 1 + }, + { + "actor" : "actors/TomH", + "movies" : 2 + }, + { + "actor" : "actors/VictorG", + "movies" : 1 + } +] +``` + +### The years and number of movies by actor with actor name + +If we want to return a list of years and not just the amount of movies an actor +acted in, then we can't use `COLLECT WITH COUNT INTO` because we can only access +`actor` and `counter` after grouping. Instead, we can use `COLLECT … INTO` to +keep track of the movie years per actor. The amount of years equals the number +of movies. + +The example query is limited to two actors for simplicity. As an added extra, +it looks up the actor `name` using the `DOCUMENT()` function: + +```js +db._query(` + FOR x IN actsIn + FILTER x._from IN [ "actors/TomH", "actors/Keanu" ] + COLLECT actor = x._from INTO years = x.year + RETURN { + name: DOCUMENT(actor).name, + movies: COUNT(years), + years + }` +).toArray(); +``` + +```json +[ + { + "name" : "Keanu Reeves", + "movies" : 4, + "years" : [ + 1999, + 2003, + 2003, + 1997 + ] + }, + { + "name" : "Tom Hanks", + "movies" : 3, + "years" : [ + 1998, + 1993, + 1990 + ] + } +] +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/counting.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/counting.md new file mode 100644 index 0000000000..11079180c2 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/counting.md @@ -0,0 +1,28 @@ +--- +title: Counting in AQL +menuTitle: Counting +weight: 10 +description: >- + You can count the documents of a collection in different ways +--- +## Amount of documents in a collection + +To return the count of documents that currently exist in a collection, +you can call the [`LENGTH()` function](../functions/array.md#length): + +```aql +RETURN LENGTH(collection) +``` + +This type of call is optimized since 2.8 (no unnecessary intermediate result +is built up in memory) and it is therefore the preferred way to determine the count. +Internally, [`COLLECTION_COUNT()`](../functions/miscellaneous.md#collection_count) is called. + +In earlier versions with `COLLECT ... WITH COUNT INTO` available (since 2.4), +you may use the following code instead of `LENGTH()` for better performance: + +```aql +FOR doc IN collection + COLLECT WITH COUNT INTO length + RETURN length +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/create-test-data.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/create-test-data.md new file mode 100644 index 0000000000..90a27a2bd8 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/create-test-data.md @@ -0,0 +1,95 @@ +--- +title: Create Test Data with AQL +menuTitle: Create Test Data +weight: 5 +description: >- + How to fill a collection with dummy documents +--- +We assume that there is already a collection to the hold documents called +`myCollection` in below example queries. + +One of the easiest ways to fill a collection with test data is to use an AQL +query that iterates over a range. + +Run the following AQL query e.g. from the _AQL Editor_ in the web interface +to insert 1,000 documents into the collection: + +```aql +FOR i IN 1..1000 + INSERT { name: CONCAT("test", i) } IN myCollection +``` + +The number of documents to create can be modified easily be adjusting the range +boundary values. + +If you want to inspect the result immediately, add `RETURN NEW` at the end of +the query. + +To create more complex test data, adjust the AQL query. Let us say we also want +a `status` attribute, and fill it with integer values between `1` to `5` +(inclusive), with equal distribution. A good way to achieve this is to use +the modulo operator (`%`): + +```aql +FOR i IN 1..1000 + INSERT { + name: CONCAT("test", i), + status: 1 + (i % 5) + } IN myCollection +``` + +To create pseudo-random values, use the `RAND()` function. It creates +pseudo-random numbers between `0` and `1`. Use some factor to scale the random +numbers, and `FLOOR()` to convert the scaled number back to an integer. + +For example, the following query populates the `value` attribute with numbers +between 100 and 150 (inclusive): + +```aql +FOR i IN 1..1000 + INSERT { + name: CONCAT("test", i), + value: 100 + FLOOR(RAND() * (150 - 100 + 1)) + } IN myCollection +``` + +After the test data has been created, it is often helpful to verify it. The +`RAND()` function is also a good candidate for retrieving a random sample of +the documents in the collection. This query will retrieve 10 random documents: + +```aql +FOR doc IN myCollection + SORT RAND() + LIMIT 10 + RETURN doc +``` + +The `COLLECT` clause is an easy mechanism to run an aggregate analysis on some +attribute. Let us say we wanted to verify the data distribution inside the +`status` attribute. In this case we could run: + +```aql +FOR doc IN myCollection + COLLECT value = doc.value WITH COUNT INTO count + RETURN { + value: value, + count: count + } +``` + +The above query will provide the number of documents per distinct `value`. + +We can make the JSON result a bit more compact by using the value as attribute +key, the count as attribute value and merge everything into a single result +object. Note that attribute keys can only be strings, but for our purposes here +it is acceptable. + +```aql +RETURN MERGE( + FOR doc IN myCollection + COLLECT value = doc.value WITH COUNT INTO count + RETURN { + [value]: count + } +) +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/diffing-two-documents.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/diffing-two-documents.md new file mode 100644 index 0000000000..14dbc7d3d8 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/diffing-two-documents.md @@ -0,0 +1,128 @@ +--- +title: Diffing Two Documents in AQL +menuTitle: Diffing Two Documents +weight: 55 +description: >- + How to determine the differences in attributes of two documents +--- +There is no built-in AQL function to compare the attributes of two documents, +but it is easily possible to build a query that does: + +```aql +// input document 1 +LET doc1 = { + "foo": "bar", + "a": 1, + "b": 2 +} + +// input document 2 +LET doc2 = { + "foo": "baz", + "a": 2, + "c": 3 +} + +// collect attributes present in doc1, but missing in doc2 +LET missing = ( + FOR key IN ATTRIBUTES(doc1) + FILTER ! HAS(doc2, key) + RETURN { + [ key ]: doc1[key] + } +) + +// collect attributes present in both docs, but that have different values +LET changed = ( + FOR key IN ATTRIBUTES(doc1) + FILTER HAS(doc2, key) && doc1[key] != doc2[key] + RETURN { + [ key ] : { + old: doc1[key], + new: doc2[key] + } + } +) + +// collect attributes present in doc2, but missing in doc1 +LET added = ( + FOR key IN ATTRIBUTES(doc2) + FILTER ! HAS(doc1, key) + RETURN { + [ key ]: doc2[key] + } +) + +// return final result +RETURN { + "missing": missing, + "changed": changed, + "added": added +} +``` + +The query may look a bit lengthy, but much of that is due to formatting. +A more terse version can be found below. + +The above query will return a document with three attributes: + +- `missing`: + Contains all attributes only present in first document + (i.e. missing in second document) + +- `changed`: + Contains all attributes present in both documents that have different values + +- `added`: + Contains all attributes only present in second document + (i.e. missing in first document) + +For the two example documents it will return: + +```json +[ + { + "missing" : [ + { + "b" : 2 + } + ], + "changed" : [ + { + "foo" : { + "old" : "bar", + "new" : "baz" + } + }, + { + "a" : { + "old" : 1, + "new" : 2 + } + } + ], + "added" : [ + { + "c" : 3 + } + ] + } +] +``` + +You may adjust the query to produce a different output format. + +Following is a version of the same query that can be invoked from JavaScript +easily. It passes the two documents as bind parameters and calls `db._query`. +The query is now an one-liner (less readable but easier to copy & paste): + +```js +bindVariables = { + doc1 : { "foo" : "bar", "a" : 1, "b" : 2 }, + doc2 : { "foo" : "baz", "a" : 2, "c" : 3 } +}; + +query = "LET doc1 = @doc1, doc2 = @doc2, missing = (FOR key IN ATTRIBUTES(doc1) FILTER ! HAS(doc2, key) RETURN { [ key ]: doc1[key] }), changed = (FOR key IN ATTRIBUTES(doc1) FILTER HAS(doc2, key) && doc1[key] != doc2[key] RETURN { [ key ] : { old: doc1[key], new: doc2[key] } }), added = (FOR key IN ATTRIBUTES(doc2) FILTER ! HAS(doc1, key) RETURN { [ key ] : doc2[key] }) RETURN { missing : missing, changed : changed, added : added }"; + +result = db._query(query, bindVariables).toArray(); +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/dynamic-attribute-names.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/dynamic-attribute-names.md new file mode 100644 index 0000000000..59efe9f163 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/dynamic-attribute-names.md @@ -0,0 +1,202 @@ +--- +title: Dynamic Attribute Names in AQL +menuTitle: Dynamic Attribute Names +weight: 15 +description: >- + You can use expressions as attribute names or use subqueries and `ZIP()` to + create objects with varying attribute names +--- +You might want an AQL query to return results with attribute names assembled +by a function, or with a variable number of attributes. + +This will not work by specifying the result using a regular object literal, +as object literals require the names and numbers of attributes to be fixed at +query compile time. + +There are two solutions to getting dynamic attribute names to work: +- Using expressions as attribute names (fixed amount of attributes) +- Using subqueries and the `ZIP()` function (variable amount of attributes) + +## Using expressions as attribute names + +This solution works in cases where the number of dynamic attributes to return +is known in advance, and only the attribute names need to be calculated using +an expression. + +Using expressions as attribute names instead of fixed attribute names in object +literals requires enclosing the expression in extra `[` and `]` to disambiguate +them from regular, unquoted attribute names. + +Let us create a result that returns the original document data contained in +a dynamically named attribute. We will be using the expression `doc.type` +for the attribute name. We will also return some other attributes from the +original documents, but prefix them with the documents' `_key` attribute values. +For this we also need attribute name expressions. + +Here is a query showing how to do this. The attribute name expressions all +required to be enclosed in `[` and `]` in order to make this work: + +```aql +LET documents = [ + { "_key" : "3231748397810", "gender" : "f", "status" : "active", "type" : "user" }, + { "_key" : "3231754427122", "gender" : "m", "status" : "inactive", "type" : "unknown" } +] + +FOR doc IN documents + RETURN { + [ doc.type ] : { + [ CONCAT(doc._key, "_gender") ] : doc.gender, + [ CONCAT(doc._key, "_status") ] : doc.status + } + } +``` + +This will return: + +```json +[ + { + "user": { + "3231748397810_gender": "f", + "3231748397810_status": "active" + } + }, + { + "unknown": { + "3231754427122_gender": "m", + "3231754427122_status": "inactive" + } + } +] +``` + +Note: +Attribute name expressions and regular, unquoted attribute names can be mixed. + +## Subquery solution + +A generalized solution is to let a subquery or another function produce the +dynamic attribute names, and finally pass them through the `ZIP()` function to +create an object from them. + +Let us assume we want to process the following input documents: + +```json +{ "name": "test", "gender": "f", "status": "active", "type": "user" } +{ "name": "dummy", "gender": "m", "status": "inactive", "type": "unknown", "magicFlag": 23 } +``` + +Let us also assume our goal for each of these documents is to return only the +attribute names that contain the letter `a`, together with their respective +values. + +To extract the attribute names and values from the original documents, we can +use a subquery as follows: + +```aql +LET documents = [ + { "name": "test"," gender": "f", "status": "active", "type": "user" }, + { "name": "dummy", "gender": "m", "status": "inactive", "type": "unknown", "magicFlag": 23 } +] + +FOR doc IN documents + RETURN ( + FOR name IN ATTRIBUTES(doc) + FILTER LIKE(name, '%a%') + RETURN { + name: name, + value: doc[name] + } + ) +``` + +The subquery will only let attribute names pass that contain the letter `a`. +The results of the subquery are then made available to the main query and will +be returned. But the attribute names in the result are still `name` and `value`, +so we're not there yet. + +So let us also employ AQL's [`ZIP()`](../functions/document-object.md#zip) function, +which can create an object from two arrays: + +- the first parameter to `ZIP()` is an array with the attribute names +- the second parameter to `ZIP()` is an array with the attribute values + +Instead of directly returning the subquery result, we first capture it in a +variable, and pass the variable's `name` and `value` components into `ZIP()` +like this: + +```aql +LET documents = [ + { "name" : "test"," gender" : "f", "status" : "active", "type" : "user" }, + { "name" : "dummy", "gender" : "m", "status" : "inactive", "type" : "unknown", "magicFlag" : 23 } +] + +FOR doc IN documents + LET attributes = ( + FOR name IN ATTRIBUTES(doc) + FILTER LIKE(name, '%a%') + RETURN { + name: name, + value: doc[name] + } + ) + RETURN ZIP(attributes[*].name, attributes[*].value) +``` + +Note that we have to use the expansion operator (`[*]`) on `attributes` because +`attributes` itself is an array, and we want either the `name` attribute or the +`value` attribute of each of its members. + +To prove this is working, here is the above query's result: + +```json +[ + { + "name": "test", + "status": "active" + }, + { + "name": "dummy", + "status": "inactive", + "magicFlag": 23 + } +] +``` + +As can be seen, the two results have a different amount of result attributes. +We can also make the result a bit more dynamic by prefixing each attribute +with the value of the `name` attribute: + +```aql +LET documents = [ + { "name": "test"," gender": "f", "status": "active", "type": "user" }, + { "name": "dummy", "gender": "m", "status": "inactive", "type": "unknown", "magicFlag": 23 } +] + +FOR doc IN documents + LET attributes = ( + FOR name IN ATTRIBUTES(doc) + FILTER LIKE(name, '%a%') + RETURN { + name: CONCAT(doc.name, '-', name), + value: doc[name] + } + ) + RETURN ZIP(attributes[*].name, attributes[*].value) +``` + +That will give us document-specific attribute names like this: + +```json +[ + { + "test-name": "test", + "test-status": "active" + }, + { + "dummy-name": "dummy", + "dummy-status": "inactive", + "dummy-magicFlag": 23 + } +] +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/grouping.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/grouping.md new file mode 100644 index 0000000000..cb54fefcf2 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/grouping.md @@ -0,0 +1,359 @@ +--- +title: Grouping and aggregating data in AQL +menuTitle: Grouping +weight: 30 +description: >- + You can group data by arbitrary criteria with AQL's `COLLECT` operation, + with optional aggregation during grouping or using post-aggregation +--- +To group results by arbitrary criteria, AQL provides the `COLLECT` keyword. +`COLLECT` will perform a grouping, but no aggregation. Aggregation can still be +added in the query if required. + +## Ensuring uniqueness + +`COLLECT` can be used to make a result set unique. The following query will return each distinct +`age` attribute value only once: + +```aql +FOR u IN users + COLLECT age = u.age + RETURN age +``` + +This is grouping without tracking the group values, but just the group criterion (*age*) value. + +Grouping can also be done on multiple levels using `COLLECT`: + +```aql +FOR u IN users + COLLECT status = u.status, age = u.age + RETURN { status, age } +``` + +Alternatively `RETURN DISTINCT` can be used to make a result set unique. +`RETURN DISTINCT` supports a single criterion only: + +```aql +FOR u IN users + RETURN DISTINCT u.age +``` + +`RETURN DISTINCT` does not change the order of results. For above query that +means the order is undefined because no particular order is guaranteed when +iterating over a collection without explicit `SORT` operation. + +## Fetching group values + +To group users by age, and return the names of the users with the highest ages, +we'll issue a query like this: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT age = u.age INTO usersByAge + SORT age DESC LIMIT 0, 5 + RETURN { + age, + users: usersByAge[*].u.name + } +``` + +```json +[ + { "age": 37, "users": [ "John", "Sophia" ] }, + { "age": 36, "users": [ "Fred", "Emma" ] }, + { "age": 34, "users": [ "Madison" ] }, + { "age": 33, "users": [ "Chloe", "Michael" ] }, + { "age": 32, "users": [ "Alexander" ] } +] +``` + +The query will put all users together by their *age* attribute. There will be one +result document per distinct *age* value (let aside the `LIMIT`). For each group, +we have access to the matching document via the *usersByAge* variable introduced in +the `COLLECT` statement. + +## Variable Expansion + +The *usersByAge* variable contains the full documents found, and as we're only +interested in user names, we'll use the expansion operator `[*]` to extract just the +*name* attribute of all user documents in each group: + +```aql +usersByAge[*].u.name +``` + +The `[*]` expansion operator is just a handy short-cut. We could also write +a subquery: + +```aql +( FOR temp IN usersByAge RETURN temp.u.name ) +``` + +## Grouping by multiple criteria + +To group by multiple criteria, we'll use multiple arguments in the `COLLECT` clause. +For example, to group users by *ageGroup* (a derived value we need to calculate first) +and then by *gender*, we'll do: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT ageGroup = FLOOR(u.age / 5) * 5, + gender = u.gender INTO group + SORT ageGroup DESC + RETURN { + ageGroup, + gender + } +``` + +```json +[ + { "ageGroup": 35, "gender": "f" }, + { "ageGroup": 35, "gender": "m" }, + { "ageGroup": 30, "gender": "f" }, + { "ageGroup": 30, "gender": "m" }, + { "ageGroup": 25, "gender": "f" }, + { "ageGroup": 25, "gender": "m" } +] +``` + +## Counting group values + +If the goal is to count the number of values in each group, AQL provides the special +*COLLECT WITH COUNT INTO* syntax. This is a simple variant for grouping with an additional +group length calculation: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT ageGroup = FLOOR(u.age / 5) * 5, + gender = u.gender WITH COUNT INTO numUsers + SORT ageGroup DESC + RETURN { + ageGroup, + gender, + numUsers + } +``` + +```json +[ + { "ageGroup": 35, "gender": "f", "numUsers": 2 }, + { "ageGroup": 35, "gender": "m", "numUsers": 2 }, + { "ageGroup": 30, "gender": "f", "numUsers": 4 }, + { "ageGroup": 30, "gender": "m", "numUsers": 4 }, + { "ageGroup": 25, "gender": "f", "numUsers": 2 }, + { "ageGroup": 25, "gender": "m", "numUsers": 2 } +] +``` + +## Aggregation + +Adding further aggregation is also simple in AQL by using an `AGGREGATE` clause +in the `COLLECT`: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT ageGroup = FLOOR(u.age / 5) * 5, + gender = u.gender + AGGREGATE numUsers = LENGTH(1), + minAge = MIN(u.age), + maxAge = MAX(u.age) + SORT ageGroup DESC + RETURN { + ageGroup, + gender, + numUsers, + minAge, + maxAge + } +``` + +```json +[ + { + "ageGroup": 35, + "gender": "f", + "numUsers": 2, + "minAge": 36, + "maxAge": 39, + }, + { + "ageGroup": 35, + "gender": "m", + "numUsers": 2, + "minAge": 35, + "maxAge": 39, + }, + ... +] +``` + +We have used the aggregate functions *LENGTH* here (it returns the length of an array). +This is the equivalent to SQL's `SELECT g, COUNT(*) FROM ... GROUP BY g`. In addition to +`LENGTH`, AQL also provides `MAX`, `MIN`, `SUM` and `AVERAGE`, `VARIANCE_POPULATION`, +`VARIANCE_SAMPLE`, `STDDEV_POPULATION`, `STDDEV_SAMPLE`, `UNIQUE`, `SORTED_UNIQUE` and +`COUNT_UNIQUE` as basic aggregation functions. + +In AQL all aggregation functions can be run on arrays only. If an aggregation function +is run on anything that is not an array, a warning will be produced and the result will +be `null`. + +Using an `AGGREGATE` clause will ensure the aggregation is run while the groups are built +in the collect operation. This is normally more efficient than collecting all group values +for all groups and then doing a post-aggregation. + +## Post-aggregation + +Aggregation can also be performed after a `COLLECT` operation using other AQL constructs, +though performance-wise this is often inferior to using `COLLECT` with `AGGREGATE`. + +The same query as before can be turned into a post-aggregation query as shown below. Note +that this query will build and pass on all group values for all groups inside the variable +*g*, and perform the aggregation at the latest possible stage: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT ageGroup = FLOOR(u.age / 5) * 5, + gender = u.gender INTO g + SORT ageGroup DESC + RETURN { + ageGroup, + gender, + numUsers: LENGTH(g[*]), + minAge: MIN(g[*].u.age), + maxAge: MAX(g[*].u.age) + } +``` + +```json +[ + { + "ageGroup": 35, + "gender": "f", + "numUsers": 2, + "minAge": 36, + "maxAge": 39, + }, + { + "ageGroup": 35, + "gender": "m", + "numUsers": 2, + "minAge": 35, + "maxAge": 39, + }, + ... +] +``` + +This is in contrast to the previous query that used an `AGGREGATE` clause to perform +the aggregation during the collect operation, at the earliest possible stage. + +## Post-filtering aggregated data + +To filter the results of a grouping or aggregation operation (i.e. something +similar to *HAVING* in SQL), simply add another `FILTER` clause after the `COLLECT` +statement. + +For example, to get the 3 *ageGroup*s with the most users in them: + +```aql +FOR u IN users + FILTER u.active == true + COLLECT ageGroup = FLOOR(u.age / 5) * 5 INTO group + LET numUsers = LENGTH(group) + FILTER numUsers > 2 /* group must contain at least 3 users in order to qualify */ + SORT numUsers DESC + LIMIT 0, 3 + RETURN { + "ageGroup": ageGroup, + "numUsers": numUsers, + "users": group[*].u.name + } +``` + +```json +[ + { + "ageGroup": 30, + "numUsers": 8, + "users": [ + "Abigail", + "Madison", + "Anthony", + "Alexander", + "Isabella", + "Chloe", + "Daniel", + "Michael" + ] + }, + { + "ageGroup": 25, + "numUsers": 4, + "users": [ + "Mary", + "Mariah", + "Jim", + "Diego" + ] + }, + { + "ageGroup": 35, + "numUsers": 4, + "users": [ + "Fred", + "John", + "Emma", + "Sophia" + ] + } +] +``` + +To increase readability, the repeated expression *LENGTH(group)* was put into a variable +*numUsers*. The `FILTER` on *numUsers* is the equivalent an SQL *HAVING* clause. + +## Aggregating data in local time + +If you store datetimes in UTC in your collections and need to group data for +each day in your local timezone, you can use `DATE_UTCTOLOCAL()` and +`DATE_TRUNC()` to adjust for that. + +Note: In the timezone `Europe/Berlin` daylight saving activated on 2020-03-29, +thus 2020-01-31T**23**:00:00Z is 2020-02-01 midnight in Germany and +2020-03-31T**22**:00:00Z is 2020-04-01 midnight in Germany. + +```aql +--- +name: aqlDateGroupingLocalTime_1 +description: '' +bindVars: + { + "activities": [ + {"startDate": "2020-01-31T23:00:00Z", "endDate": "2020-02-01T03:00:00Z", "duration": 4, "rate": 250}, + {"startDate": "2020-02-01T09:00:00Z", "endDate": "2020-02-01T17:00:00Z", "duration": 8, "rate": 250}, + {"startDate": "2020-03-31T21:00:00Z", "endDate": "2020-03-31T22:00:00Z", "duration": 1, "rate": 250}, + {"startDate": "2020-03-31T22:00:00Z", "endDate": "2020-04-01T03:00:00Z", "duration": 5, "rate": 250}, + {"startDate": "2020-04-01T13:00:00Z", "endDate": "2020-04-01T16:00:00Z", "duration": 3, "rate": 250} + ] + } +--- +FOR a IN @activities +COLLECT + day = DATE_TRUNC(DATE_UTCTOLOCAL(a.startDate, 'Europe/Berlin'), 'day') +AGGREGATE + hours = SUM(a.duration), + revenue = SUM(a.duration * a.rate) +SORT day ASC +RETURN { + day, + hours, + revenue +} +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/joins.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/joins.md new file mode 100644 index 0000000000..5c7e3d3c48 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/joins.md @@ -0,0 +1,892 @@ +--- +title: Using Joins in AQL +menuTitle: Joins +weight: 25 +description: >- + Query examples for joining documents with one-to-many and many-to-many relationships +--- +The two common scenarios when you want to join documents of collections are: + +- **One-to-Many**: + You may have a `users` collection and a `cities` collection. A user lives in + a city and you need the city information during a query about the user. + +- **Many-To-Many**: + You may have a `authors` collection and a `books` collection. An author can write many + books and a book can have many authors. You want to return a list of books + with their authors. Therefore you need to join the authors and books. + +Unlike many NoSQL databases, ArangoDB does support joins in AQL queries. This +is similar to the way traditional relational databases handle this. However, +because documents allow for more flexibility, joins are also more flexible. +The following sections provide solutions for common questions. + +So far, we have only dealt with one collection (`users`) at a time. We also have a +collection `relations` that stores relationships between users. We now use +this extra collection to create a result from two collections. + +First of all, we query a few users together with their friends' IDs. For that, +we use all `relations` that have a value of `friend` in their `type` attribute. +Relationships are established by using the `friendOf` and `thisUser` attributes in the +`relations` collection, which point to the `userId` values in the `users` collection. + +## One-To-Many + +You have a collection called `users`. Users live in city and a city is identified +by its primary key. In principle you can embedded the city document into the +users document and be happy with it. + +```json +{ + "_id" : "users/2151975421", + "_key" : "2151975421", + "_rev" : "2151975421", + "name" : { + "first" : "John", + "last" : "Doe" + }, + "city" : { + "name" : "Metropolis" + } +} +``` + +This works well for many use cases. Now assume that you have additional +information about the city, like the number of people living in it. It would be +impractical to change each and every user document if this numbers changes. +Therefore it is good idea to hold the city information in a separate collection. + +```js +arangosh> db.cities.document("cities/2241300989"); +``` + +```json +{ + "population" : 1000, + "name" : "Metropolis", + "_id" : "cities/2241300989", + "_rev" : "2241300989", + "_key" : "2241300989" +} +``` + +Instead of embedding the city directly in the user document, you can use +the key of the city. + +```js +arangosh> db.users.document("users/2290649597"); +``` + +```json +{ + "name" : { + "first" : "John", + "last" : "Doe" + }, + "city" : "cities/2241300989", + "_id" : "users/2290649597", + "_rev" : "2290649597", + "_key" : "2290649597" +} +``` + +We can now join these two collections very easily. + +```js +arangosh> db._query( +........>"FOR u IN users " + +........>" FOR c IN cities " + +........>" FILTER u.city == c._id RETURN { user: u, city: c }" +........>).toArray() +``` + +```json +[ + { + "user" : { + "name" : { + "first" : "John", + "last" : "Doe" + }, + "city" : "cities/2241300989", + "_id" : "users/2290649597", + "_rev" : "2290649597", + "_key" : "2290649597" + }, + "city" : { + "population" : 1000, + "name" : "Metropolis", + "_id" : "cities/2241300989", + "_rev" : "2241300989", + "_key" : "2241300989" + } + } +] +``` + +Unlike in SQL, there is no special `JOIN` keyword. The optimizer ensures that the +primary index is used in the above query. + +However, very often it is much more convenient for the client of the query if a +single document would be returned, where the city information is embedded in the +user document - as in the simple example above. With AQL, you do not need +to forgo this simplification. + +```js +arangosh> db._query( +........>"FOR u IN users " + +........>" FOR c IN cities " + +........>" FILTER u.city == c._id RETURN merge(u, {city: c})" +........>).toArray() +``` + +```json +[ + { + "_id" : "users/2290649597", + "_key" : "2290649597", + "_rev" : "2290649597", + "name" : { + "first" : "John", + "last" : "Doe" + }, + "city" : { + "_id" : "cities/2241300989", + "_key" : "2241300989", + "_rev" : "2241300989", + "population" : 1000, + "name" : "Metropolis" + } + } +] +``` + +You can have both: the convenient representation of the result for your +client and the flexibility of joins for your data model. + +## Many-To-Many + +In the relational world, you need a third table to model the many-to-many +relation. In ArangoDB, you have a choice depending on the information you are +going to store and the type of questions you are going to ask. + +Assume that authors are stored in one collection and books in a second. If all +you need is "who are the authors of a book", then you can easily model this as +a list attribute in users. + +If you want to store more information, for example, which author wrote which +page in a conference proceeding, or if you also want to know "which books were +written by which author", you can use edge collections. This is very similar to +the "join table" from the relational world. + +### Embedded Lists + +If you only want to store the authors of a book, you can embed them as list in +the book document. There is no need for a separate collection. + +```js +arangosh> db.authors.toArray() +``` + +```json +[ + { + "_id" : "authors/2661190141", + "_key" : "2661190141", + "_rev" : "2661190141", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + } + }, + { + "_id" : "authors/2658437629", + "_key" : "2658437629", + "_rev" : "2658437629", + "name" : { + "first" : "John", + "last" : "Doe" + } + } +] +``` + +You can query books: + +```js +arangosh> db._query("FOR b IN books RETURN b").toArray(); +``` + +```json +[ + { + "_id" : "books/2681506301", + "_key" : "2681506301", + "_rev" : "2681506301", + "title" : "The beauty of JOINS", + "authors" : [ + "authors/2661190141", + "authors/2658437629" + ] + } +] +``` + +And you can join the authors in a very similar manner given in the one-to-many section: + +```js +arangosh> db._query( +........>"FOR b IN books " + +........>" LET a = (FOR x IN b.authors " + +........>" FOR a IN authors FILTER x == a._id RETURN a) " + +........>" RETURN { book: b, authors: a }" +........>).toArray(); +``` + +```json +[ + { + "book" : { + "title" : "The beauty of JOINS", + "authors" : [ + "authors/2661190141", + "authors/2658437629" + ], + "_id" : "books/2681506301", + "_rev" : "2681506301", + "_key" : "2681506301" + }, + "authors" : [ + { + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + }, + "_id" : "authors/2661190141", + "_rev" : "2661190141", + "_key" : "2661190141" + }, + { + "name" : { + "first" : "John", + "last" : "Doe" + }, + "_id" : "authors/2658437629", + "_rev" : "2658437629", + "_key" : "2658437629" + } + ] + } +] +``` + +Or you can embed the authors directly: + +```js +arangosh> db._query( +........>"FOR b IN books LET a = (" + +........>" FOR x IN b.authors " + +........>" FOR a IN authors FILTER x == a._id RETURN a)" + +........>" RETURN merge(b, { authors: a })" +........>).toArray(); +``` + +```json +[ + { + "_id" : "books/2681506301", + "_key" : "2681506301", + "_rev" : "2681506301", + "title" : "The beauty of JOINS", + "authors" : [ + { + "_id" : "authors/2661190141", + "_key" : "2661190141", + "_rev" : "2661190141", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + } + }, + { + "_id" : "authors/2658437629", + "_key" : "2658437629", + "_rev" : "2658437629", + "name" : { + "first" : "John", + "last" : "Doe" + } + } + ] + } +] +``` + +### Using Edge Collections + +If you also want to query which books are written by a given author, embedding authors +in the book document is possible, but it is more efficient to use a edge collections for +speed. + +Or you are publishing a proceeding, then you want to store the pages the author has written +as well. This information can be stored in the edge document. + +First off, create the users: + +```js +arangosh> db._create("authors"); +``` + +``` +[ArangoCollection 2926807549, "authors" (type document, status loaded)] +``` + +```js +arangosh> db.authors.save({ name: { first: "John", last: "Doe" } }) +``` + +```json +{ + "error" : false, + "_id" : "authors/2935261693", + "_rev" : "2935261693", + "_key" : "2935261693" +} +``` + +```js +arangosh> db.authors.save({ name: { first: "Maxima", last: "Musterfrau" } }) +``` + +```json +{ + "error" : false, + "_id" : "authors/2938210813", + "_rev" : "2938210813", + "_key" : "2938210813" +} +``` + +Now, create the books without any author information: + +```js +arangosh> db._create("books"); +``` + +``` +[ArangoCollection 2928380413, "books" (type document, status loaded)] +``` + +```js +arangosh> db.books.save({ title: "The beauty of JOINS" }); +``` + +```json +{ + "error" : false, + "_id" : "books/2980088317", + "_rev" : "2980088317", + "_key" : "2980088317" +} +``` + +An edge collection is now used to link authors and books: + +```js +arangosh> db._createEdgeCollection("written"); +``` + +``` +[ArangoCollection 2931132925, "written" (type edge, status loaded)] +``` + +```js +arangosh> db.written.save("authors/2935261693", +........>"books/2980088317", +........>{ pages: "1-10" }) +``` + +```json +{ + "error" : false, + "_id" : "written/3006237181", + "_rev" : "3006237181", + "_key" : "3006237181" +} +``` + +```js +arangosh> db.written.save("authors/2938210813", +........>"books/2980088317", +........>{ pages: "11-20" }) +``` + +```json +{ + "error" : false, + "_id" : "written/3012856317", + "_rev" : "3012856317", + "_key" : "3012856317" +} +``` + +In order to get all books with their authors, you can use a +[graph traversal](../graph-queries/traversals.md#working-with-collection-sets): + +```js +arangosh> db._query( +...> "FOR b IN books " + +...> "LET authorsByBook = ( " + +...> " FOR author, writtenBy IN INBOUND b written " + +...> " RETURN { " + +...> " vertex: author, " + +...> " edge: writtenBy " + +...> " } " + +...> ") " + +...> "RETURN { " + +...> " book: b, " + +...> " authors: authorsByBook " + +...> "} " +...> ).toArray(); +``` + +```json +[ + { + "book" : { + "_key" : "2980088317", + "_id" : "books/2980088317", + "_rev" : "2980088317", + "title" : "The beauty of JOINS" + }, + "authors" : [ + { + "vertex" : { + "_key" : "2935261693", + "_id" : "authors/2935261693", + "_rev" : "2935261693", + "name" : { + "first" : "John", + "last" : "Doe" + } + }, + "edge" : { + "_key" : "2935261693", + "_id" : "written/2935261693", + "_from" : "authors/2935261693", + "_to" : "books/2980088317", + "_rev" : "3006237181", + "pages" : "1-10" + } + }, + { + "vertex" : { + "_key" : "2938210813", + "_id" : "authors/2938210813", + "_rev" : "2938210813", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + } + }, + "edge" : { + "_key" : "6833274", + "_id" : "written/6833274", + "_from" : "authors/2938210813", + "_to" : "books/2980088317", + "_rev" : "3012856317", + "pages" : "11-20" + } + } + ] + } +] +``` + +Or if you want only the information stored in the vertices, you can use this query: + +```js +arangosh> db._query( +...> "FOR b IN books " + +...> "LET authorsByBook = ( " + +...> " FOR author IN INBOUND b written " + +...> " OPTIONS { " + +...> " order: 'bfs', " + +...> " uniqueVertices: 'global' " + +...> " } " + +...> " RETURN author " + +...> ") " + +...> "RETURN { " + +...> " book: b, " + +...> " authors: authorsByBook " + +...> "} " +...> ).toArray(); +``` + +```json +[ + { + "book" : { + "_key" : "2980088317", + "_id" : "books/2980088317", + "_rev" : "2980088317", + "title" : "The beauty of JOINS" + }, + "authors" : [ + { + "_key" : "2938210813", + "_id" : "authors/2938210813", + "_rev" : "2938210813", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + } + }, + { + "_key" : "2935261693", + "_id" : "authors/2935261693", + "_rev" : "2935261693", + "name" : { + "first" : "John", + "last" : "Doe" + } + } + ] + } +] +``` + +Or again embed the authors directly into the book document: + +```js +arangosh> db._query( +...> "FOR b IN books " + +...> "LET authors = ( " + +...> " FOR author IN INBOUND b written " + +...> " OPTIONS { " + +...> " order: 'bfs', " + +...> " uniqueVertices: 'global' " + +...> " } " + +...> " RETURN author " + +...> ") " + +...> "RETURN MERGE(b, {authors: authors}) " +...> ).toArray(); +``` + +```json +[ + { + "_id" : "books/2980088317", + "_key" : "2980088317", + "_rev" : "2980088317", + "title" : "The beauty of JOINS", + "authors" : [ + { + "_key" : "2938210813", + "_id" : "authors/2938210813", + "_rev" : "2938210813", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + } + }, + { + "_key" : "2935261693", + "_id" : "authors/2935261693", + "_rev" : "2935261693", + "name" : { + "first" : "John", + "last" : "Doe" + } + } + ] + } +] +``` + +If you need the authors and their books, simply reverse the direction: + +```js +> db._query( +...> "FOR a IN authors " + +...> "LET booksByAuthor = ( " + +...> " FOR b IN OUTBOUND a written " + +...> " OPTIONS { " + +...> " order: 'bfs', " + +...> " uniqueVertices: 'global' " + +...> " } " + +...> " RETURN b" + +...> ") " + +...> "RETURN MERGE(a, {books: booksByAuthor}) " +...> ).toArray(); +``` + +```json +[ + { + "_id" : "authors/2935261693", + "_key" : "2935261693", + "_rev" : "2935261693", + "name" : { + "first" : "John", + "last" : "Doe" + }, + "books" : [ + { + "_key" : "2980088317", + "_id" : "books/2980088317", + "_rev" : "2980088317", + "title" : "The beauty of JOINS" + } + ] + }, + { + "_id" : "authors/2938210813", + "_key" : "2938210813", + "_rev" : "2938210813", + "name" : { + "first" : "Maxima", + "last" : "Musterfrau" + }, + "books" : [ + { + "_key" : "2980088317", + "_id" : "books/2980088317", + "_rev" : "2980088317", + "title" : "The beauty of JOINS" + } + ] + } +] +``` + +## More examples + +### Join tuples + +We will start with a SQL-ish result set and return each tuple (user name, friends userId) +separately. The AQL query to generate such result is: + +```aql +--- +name: joinTuples +description: '' +dataset: joinSampleDataset +bindVars: + { + "friend": "friend" + } +--- +FOR u IN users + FILTER u.active == true + LIMIT 0, 4 + FOR f IN relations + FILTER f.type == @friend && f.friendOf == u.userId + RETURN { + "user" : u.name, + "friendId" : f.thisUser + } +``` + +We iterate over the collection users. Only the 'active' users will be examined. +For each of these users we will search for up to 4 friends. We locate friends +by comparing the `userId` of our current user with the `friendOf` attribute of the +`relations` document. For each of those relations found we return the users name +and the userId of the friend. + +### Horizontal lists + +Note that in the above result, a user can be returned multiple times. This is the +SQL way of returning data. If this is not desired, the friends' ids of each user +can be returned in a horizontal list. This will return each user at most once. + +The AQL query for doing so is: + +```aql +FOR u IN users + FILTER u.active == true LIMIT 0, 4 + RETURN { + "user" : u.name, + "friendIds" : ( + FOR f IN relations + FILTER f.friendOf == u.userId && f.type == "friend" + RETURN f.thisUser + ) + } +``` + +```json +[ + { + "user" : "Abigail", + "friendIds" : [ + 108, + 102, + 106 + ] + }, + { + "user" : "Fred", + "friendIds" : [ + 209 + ] + }, + { + "user" : "Mary", + "friendIds" : [ + 207, + 104 + ] + }, + { + "user" : "Mariah", + "friendIds" : [ + 203, + 205 + ] + } +] +``` + +In this query we are still iterating over the users in the `users` collection +and for each matching user we are executing a subquery to create the matching +list of related users. + +### Self joins + +To not only return friend ids but also the names of friends, we could "join" the +`users` collection once more (something like a "self join"): + +```aql +FOR u IN users + FILTER u.active == true + LIMIT 0, 4 + RETURN { + "user" : u.name, + "friendIds" : ( + FOR f IN relations + FILTER f.friendOf == u.userId && f.type == "friend" + FOR u2 IN users + FILTER f.thisUser == u2.useId + RETURN u2.name + ) + } +``` + +```json +[ + { + "user" : "Abigail", + "friendIds" : [ + "Jim", + "Jacob", + "Daniel" + ] + }, + { + "user" : "Fred", + "friendIds" : [ + "Mariah" + ] + }, + { + "user" : "Mary", + "friendIds" : [ + "Isabella", + "Michael" + ] + }, + { + "user" : "Mariah", + "friendIds" : [ + "Madison", + "Eva" + ] + } +] +``` + +This query will then again in term fetch the clear text name of the +friend from the users collection. So here we iterate the users collection, +and for each hit the relations collection, and for each hit once more the +users collection. + +### Outer joins + +Lets find the lonely people in our database - those without friends. + +```aql +FOR user IN users + LET friendList = ( + FOR f IN relations + FILTER f.friendOf == u.userId + RETURN 1 + ) + FILTER LENGTH(friendList) == 0 + RETURN { "user" : user.name } +``` + +```json +[ + { + "user" : "Abigail" + }, + { + "user" : "Fred" + } +] +``` + +So, for each user we pick the list of their friends and count them. The ones where +count equals zero are the lonely people. Using `RETURN 1` in the subquery +saves even more precious CPU cycles and gives the optimizer more alternatives. + +### Index usage + +For joins in particular, you should make sure indexes can be utilized to +[speed up your queries](../execution-and-performance/explaining-queries.md). + +Note that sparse indexes don't qualify for joins. Often, You also want to join +documents not containing the property you join with. However, sparse indexes +don't contain references to documents that don't contain the indexed +attributes - thus they would be missing from the join operation. For this reason, +you should provide non-sparse indexes. + +### Pitfalls + +Since we're free of schemata, there is by default no way to tell the format of the +documents. So, if your documents don't contain an attribute, it defaults to +null. We can however check our data for accuracy like this: + +```aql +RETURN LENGTH(FOR u IN users FILTER u.userId == null RETURN 1) +``` + +```json +[ + 10000 +] +``` + +```aql +RETURN LENGTH(FOR f IN relations FILTER f.friendOf == null RETURN 1) +``` + +```json +[ + 10000 +] +``` + +So if the above queries return 10k matches each, the result of the Join tuples +query will become 100,000,000 items larger and use much memory plus computation +time. So it is generally a good idea to revalidate that the criteria for your +join conditions exist. + +Using indexes on the properties can speed up the operation significantly. +You can use the explain helper to revalidate your query actually uses them. + +If you work with joins on edge collections you would typically aggregate over +the internal fields `_id`, `_from` and `_to` (where `_id` equals `userId`, +`_from` `friendOf` and `_to` would be `thisUser` in our examples). ArangoDB +implicitly creates indexes on them. diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/projections-and-filters.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/projections-and-filters.md new file mode 100644 index 0000000000..18284c1362 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/projections-and-filters.md @@ -0,0 +1,136 @@ +--- +title: Projections and Filters in AQL +menuTitle: Projections and filters +weight: 20 +description: >- + Examples of returning documents unaltered and subsets of their attributes, + as well as filtering +--- +## Returning unaltered documents + +To return three complete documents from collection *users*, the following query can be used: + +```aql +FOR u IN users + LIMIT 0, 3 + RETURN u +``` + +```json +[ + { + "_id" : "users/229886047207520", + "_rev" : "229886047207520", + "_key" : "229886047207520", + "active" : true, + "id" : 206, + "age" : 31, + "gender" : "f", + "name" : "Abigail" + }, + { + "_id" : "users/229886045175904", + "_rev" : "229886045175904", + "_key" : "229886045175904", + "active" : true, + "id" : 101, + "age" : 36, + "name" : "Fred", + "gender" : "m" + }, + { + "_id" : "users/229886047469664", + "_rev" : "229886047469664", + "_key" : "229886047469664", + "active" : true, + "id" : 208, + "age" : 29, + "name" : "Mary", + "gender" : "f" + } +] +``` + +Note that there is a `LIMIT` clause but no `SORT` clause. In this case it is not guaranteed +which of the user documents are returned. Effectively the document return order is unspecified +if no `SORT` clause is used, and you should not rely on the order in such queries. + +## Projections + +To return a projection from the collection *users* use a modified `RETURN` instruction: + +```aql +FOR u IN users + LIMIT 0, 3 + RETURN { + "user" : { + "isActive" : u.active ? "yes" : "no", + "name" : u.name + } + } +``` + +```json +[ + { + "user" : { + "isActive" : "yes", + "name" : "John" + } + }, + { + "user" : { + "isActive" : "yes", + "name" : "Anthony" + } + }, + { + "user" : { + "isActive" : "yes", + "name" : "Fred" + } + } +] +``` + +## Filters + +To return a filtered projection from collection *users*, you can use the +`FILTER` keyword. Additionally, a `SORT` clause is used to have the result +returned in a specific order: + +```aql +FOR u IN users + FILTER u.active == true && u.age >= 30 + SORT u.age DESC + LIMIT 0, 5 + RETURN { + "age" : u.age, + "name" : u.name + } +``` + +```json +[ + { + "age" : 37, + "name" : "Sophia" + }, + { + "age" : 37, + "name" : "John" + }, + { + "age" : 36, + "name" : "Emma" + }, + { + "age" : 36, + "name" : "Fred" + }, + { + "age" : 34, + "name" : "Madison" + } +] +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/queries-without-collections.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/queries-without-collections.md new file mode 100644 index 0000000000..3e1dcd8225 --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/queries-without-collections.md @@ -0,0 +1,50 @@ +--- +title: AQL queries without collections +menuTitle: Queries without collections +weight: 50 +description: >- + You can use AQL with only expressions and no document access for some + calculation and testing purposes +--- +AQL queries typically access one or more collections to read from documents +or to modify them. Queries don't necessarily have to involve collections +however. Below are a few examples of that. + +Following is a query that returns a string value. The result string is contained in an array +because the result of every valid query is an array: + +```aql +--- +name: aqlWithoutCollections_1 +description: '' +--- +RETURN "this will be returned" +``` + +You may use variables, call functions and return arbitrarily structured results: + +```aql +--- +name: aqlWithoutCollections_2 +description: '' +--- +LET array = [1, 2, 3, 4] +RETURN { array, sum: SUM(array) } +``` + +Language constructs such as the FOR loop can be used too. Below query +creates the Cartesian product of two arrays and concatenates the value pairs: + +```aql +--- +name: aqlWithoutCollections_3 +description: '' +--- +FOR year IN [ 2011, 2012, 2013 ] + FOR quarter IN [ 1, 2, 3, 4 ] + RETURN { + year, + quarter, + formatted: CONCAT(quarter, " / ", year) + } +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/remove-vertex.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/remove-vertex.md new file mode 100644 index 0000000000..ba8fc85e2f --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/remove-vertex.md @@ -0,0 +1,83 @@ +--- +title: Remove vertices with AQL +menuTitle: Remove vertex +weight: 45 +description: >- + Removing connected edges along with vertex documents directly in AQL is + possible in a limited way +aliases: + - remove-nodes +--- +Deleting vertices with associated edges is currently not handled via AQL while +the [graph management interface](../../graphs/general-graphs/management.md#remove-a-vertex) +and the +[REST API for the graph module](../../develop/http-api/graphs/named-graphs.md#remove-a-vertex) +offer a vertex deletion functionality. +However, as shown in this example based on the +[Knows Graph](../../graphs/example-graphs.md#knows-graph), a query for this +use case can be created. + +![Example Graph](../../../../images/knows_graph.png) + +When deleting vertex **eve** from the graph, we also want the edges +`eve -> alice` and `eve -> bob` to be removed. +The involved graph and its only edge collection has to be known. In this case it +is the graph **knows_graph** and the edge collection **knows**. + +This query will delete **eve** with its adjacent edges: + +```aql +--- +name: GRAPHTRAV_removeVertex1 +description: '' +dataset: knows_graph +--- +LET edgeKeys = (FOR v, e IN 1..1 ANY 'persons/eve' GRAPH 'knows_graph' RETURN e._key) +LET r = (FOR key IN edgeKeys REMOVE key IN knows) +REMOVE 'eve' IN persons +``` + +This query executed several actions: +- use a graph traversal of depth 1 to get the `_key` of **eve's** adjacent edges +- remove all of these edges from the `knows` collection +- remove vertex **eve** from the `persons` collection + +The following query shows a different design to achieve the same result: + +```aql +--- +name: GRAPHTRAV_removeVertex2 +description: '' +dataset: knows_graph +--- +LET edgeKeys = (FOR v, e IN 1..1 ANY 'persons/eve' GRAPH 'knows_graph' + REMOVE e._key IN knows) +REMOVE 'eve' IN persons +``` + +**Note**: The query has to be adjusted to match a graph with multiple vertex/edge collections. + +For example, the [City Graph](../../graphs/example-graphs.md#city-graph) +contains several vertex collections - `germanCity` and `frenchCity` and several +edge collections - `french / german / international Highway`. + +![Example Graph2](../../../../images/cities_graph.png) + +To delete city **Berlin** all edge collections `french / german / international Highway` +have to be considered. The **REMOVE** operation has to be applied on all edge +collections with `OPTIONS { ignoreErrors: true }`. Not using this option will stop the query +whenever a non existing key should be removed in a collection. + +```aql +--- +name: GRAPHTRAV_removeVertex3 +description: '' +dataset: routeplanner +--- +LET edgeKeys = (FOR v, e IN 1..1 ANY 'germanCity/Berlin' GRAPH 'routeplanner' RETURN e._key) +LET r = (FOR key IN edgeKeys REMOVE key IN internationalHighway + OPTIONS { ignoreErrors: true } REMOVE key IN germanHighway + OPTIONS { ignoreErrors: true } REMOVE key IN frenchHighway + OPTIONS { ignoreErrors: true }) +REMOVE 'Berlin' IN germanCity +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/traversals.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/traversals.md new file mode 100644 index 0000000000..ed6745de4a --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/traversals.md @@ -0,0 +1,118 @@ +--- +title: Combining AQL Graph Traversals +menuTitle: Traversals +weight: 40 +description: >- + You can combine graph queries with other AQL features like geo-spatial search +--- +## Finding the start vertex via a geo query + +Our first example will locate the start vertex for a graph traversal via [a geo index](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md). +We use the [City Graph](../../graphs/example-graphs.md#city-graph) and its geo indexes: + +![Cities Example Graph](../../../../images/cities_graph.png) + +```js +--- +name: COMBINING_GRAPH_01_create_graph +description: '' +--- +var examples = require("@arangodb/graph-examples/example-graph"); +examples.loadGraph("routeplanner"); +~examples.dropGraph("routeplanner"); +``` + +We search all german cities in a range of 400 km around the ex-capital **Bonn**: **Hamburg** and **Cologne**. +We won't find **Paris** since its in the `frenchCity` collection. + +```aql +--- +name: COMBINING_GRAPH_02_show_geo +description: '' +dataset: routeplanner +bindVars: + { + "bonn": [7.0998, 50.7340], + "radius": 400000 + } +--- +FOR startCity IN germanCity + FILTER GEO_DISTANCE(@bonn, startCity.geometry) < @radius + RETURN startCity._key +``` + +Let's revalidate that the geo indexes are actually used: + +```aql +--- +name: COMBINING_GRAPH_03_explain_geo +description: '' +dataset: routeplanner +explain: true +bindVars: + { + "bonn": [7.0998, 50.7340], + "radius": 400000 + } +--- +FOR startCity IN germanCity + FILTER GEO_DISTANCE(@bonn, startCity.geometry) < @radius + RETURN startCity._key +``` + +And now combine this with a graph traversal: + +```aql +--- +name: COMBINING_GRAPH_04_combine +description: '' +dataset: routeplanner +bindVars: + { + "bonn": [7.0998, 50.7340], + "radius": 400000 + } +--- +FOR startCity IN germanCity + FILTER GEO_DISTANCE(@bonn, startCity.geometry) < @radius + FOR v, e, p IN 1..1 OUTBOUND startCity + GRAPH 'routeplanner' + RETURN {startcity: startCity._key, traversedCity: v._key} +``` + +The geo index query returns us `startCity` (**Cologne** and **Hamburg**) which we then use as starting point for our graph traversal. +For simplicity we only return their direct neighbours. We format the return result so we can see from which `startCity` the traversal came. + +Alternatively we could use a `LET` statement with a subquery to group the traversals by their `startCity` efficiently: + +```aql +--- +name: COMBINING_GRAPH_05_combine_let +description: '' +dataset: routeplanner +bindVars: + { + "bonn": [7.0998, 50.7340], + "radius": 400000 + } +--- +FOR startCity IN germanCity + FILTER GEO_DISTANCE(@bonn, startCity.geometry) < @radius + LET oneCity = ( + FOR v, e, p IN 1..1 OUTBOUND startCity + GRAPH 'routeplanner' RETURN v._key + ) + RETURN {startCity: startCity._key, connectedCities: oneCity} +``` + +Finally, we clean up again: + +```js +--- +name: COMBINING_GRAPH_06_cleanup +description: '' +--- +~var examples = require("@arangodb/graph-examples/example-graph"); +~var g = examples.loadGraph("routeplanner"); +examples.dropGraph("routeplanner"); +``` diff --git a/site/content/arangodb/oem/aql/examples-and-query-patterns/upsert-repsert-guide.md b/site/content/arangodb/oem/aql/examples-and-query-patterns/upsert-repsert-guide.md new file mode 100644 index 0000000000..7068acebbe --- /dev/null +++ b/site/content/arangodb/oem/aql/examples-and-query-patterns/upsert-repsert-guide.md @@ -0,0 +1,335 @@ +--- +title: Conditionally Inserting and Modifying Documents +menuTitle: Upsert / Repsert Guide +weight: 60 +description: >- + AQL offers an `UPSERT` operation and an `INSERT` operation with different + overwrite modes, and you can alternatively use the Document API, each having + different features and performance characteristics +--- +A common requirement when ingesting data is to ensure that certain documents +exist in a collection. Oftentimes when running a command it is unclear whether +the target documents are already present in the collection or need to be +inserted first. + +Unconditional `INSERT` operations will not work here, because they may run +into errors if the target documents already exist. This will trigger a +"unique constraint violation" error. Unconditional `UPDATE` or `REPLACE` +operations will also fail, because they require that the target documents are +already present. If this is not the case, the operations would run into +"document not found" errors. + +So what needs to be run instead are conditional inserts/updates/replaces, also +called _upserts_ or _repserts_. The behavior of such operations is: + +- Check if a document exists, based on some criteria +- If it does not exist, create the document +- If it exists, update or replace it with a new version + +ArangoDB provides the following options in AQL to achieve this: + +- `UPSERT` AQL operation +- `INSERT` AQL operation with `overwriteMode` +- Insert operation not using AQL, but the Document REST API + +These alternatives have different capabilities and performance characteristics. + +## `UPSERT` AQL Operation + +Let us start with the [`UPSERT` AQL operation](../high-level-operations/upsert.md), +which is very generic and flexible. + +The purpose of the `UPSERT` AQL operation is to ensure that a specific document +exists after the operation has finished. + +`UPSERT` will look for a specific document, based on user-configurable +attributes/values, and create the document if it does not yet exist. +If `UPSERT` finds such document, it can partially adjust it (`UPDATE`) or fully +replace it (`REPLACE`). + +To recap, the syntaxes of AQL `UPSERT` are, depending on whether you want to +update replace a document: + +```aql +UPSERT +INSERT +UPDATE +IN OPTIONS +``` + +or + +```aql +UPSERT +INSERT +REPLACE +IN OPTIONS +``` + +The `OPTIONS` part is optional. + +An example `UPSERT` operation looks like this: + +```aql +UPSERT { page: "index.html" } +INSERT { page: "index.html", status: "inserted" } +UPDATE { status: "updated" } +IN pages +``` + +This will look for a document in the `pages` collection with the `page` +attribute having a value of `index.html`. If such document cannot be found, the +`INSERT` part will be executed, which will create a document with the `page` and +`status` attributes. If the operation finds an existing document with `page` +being `index.html`, it will execute the `UPDATE` part, which will set the +document's `status` attribute to `updated`. + +### Tracking Modification Dates + +The `UPSERT` AQL operation is sometimes used in combination with +date/time-keeping. For example, the following query keeps track of when a +document was first created, and when it was last updated: + +```aql +UPSERT { page: "index.html" } +INSERT { page: "index.html", created: DATE_NOW() } +UPDATE { updated: DATE_NOW() } +IN pages +``` + +### `OLD` variable + +The `UPSERT` AQL operation also provides a pseudo-variable named `OLD` to refer +to the existing document and its values in the `UPDATE`/`REPLACE` part. +Following is an example that increments a counter on a document whenever the +`UPSERT` operation is executed: + +```aql +UPSERT { page: "index.html" } +INSERT { page: "index.html", hits: 1 } +UPDATE { hits: OLD.value + 1 } +IN pages +``` + +### `UPSERT` Caveats + +`UPSERT` is a very flexible operation, so some things should be kept in mind to +use it effectively and efficiently. + +#### Repeat the Search Attributes + +First of all, the `INSERT` part of an `UPSERT` operation should contain all +attributes that are used in the search expression. Consider the following +counter-example: + +```aql +UPSERT { page: "index.html" } +INSERT { status: "inserted" } /* page attribute missing here! */ +UPDATE { status: "updated" } +IN pages +``` + +Forgetting to specify the search attributes in the `INSERT` part introduces a +problem: The first time the `UPSERT` is executed and does not find a document +with `page` being `index.html`, it will branch into the `INSERT` part as +expected. However, the `INSERT` part will create a document with only the +`status` attribute set. The `page` attribute is missing here, so when the +`INSERT` completes, there is still no document with `page` being `index.html`. +That means whenever this `UPSERT` statement executes, it will branch into the +`INSERT` part, and the `UPDATE` part will never be reached. This is likely +unintentional. + +The problem can easily be avoided by adding the search attributes to the +`INSERT` part: + +```aql +UPSERT { page: "index.html" } +INSERT { page: "index.html", status: "inserted" } +UPDATE { status: "updated" } +IN pages +``` + +Note that it is not necessary to repeat the search attributes in the `UPDATE` +part, because `UPDATE` is a partial update. It will only set the attributes that +are specified in the `UPDATE` part, and leave all other existing attributes +alone. However, it is necessary to repeat the search attributes in the `REPLACE` +part, because `REPLACE` will completely overwrite the existing document with +what is specified in the `REPLACE` part. + +That means when using the `REPLACE` operation, the query should look like: + +```aql +UPSERT { page: "index.html" } +INSERT { page: "index.html", status: "inserted" } +REPLACE { page: "index.html", status: "updated" } +IN pages +``` + +#### Use Indexes for Search Attributes + +A downside of `UPSERT`'s flexibility is that it can be used on arbitrary +collection attributes, even if those are not indexed. + +When the `UPSERT` looks for an existing document, it _will_ use an index if an +index exists, but will also continue if no index exists. In the latter case, +the `UPSERT` will execute a full collection scan, which can be expensive for +large collections. So it is advised to create an index on the search +attribute(s) used in an `UPSERT`. + +#### `UPSERT` is Non-Atomic + +The overall `UPSERT` operation does not execute atomically for a single document. +It is basically a document lookup followed by either a document insert, update +or replace operation. + +That means if multiple `UPSERT` operations run concurrently with the same search +values, they may all determine that the target document does not exist - and +then all decide to create such document. That will mean one will end up with +multiple instances of the target document afterwards. + +To avoid such concurrency issues, a unique index can be created on the search +attribute(s). Such index will prevent concurrent `UPSERT` operations from +creating identical documents. Instead, only one of the concurrent `UPSERT`s will +succeed, whereas the others will fail with a "unique constraint violated" error. +In that case the client application can either retry the operation (which then +should go into the `UPDATE`/`REPLACE` branch), or ignore the error if the goal +was only to ensure the target document exists. + +Using a unique index on the search attribute(s) will thus improve lookup +performance and avoid duplicates. + +#### Using Shard Key(s) for Lookups + +In a cluster setup, the search expression should contain the shard key(s), as +this allows the lookup to be sent to a single shard only. This will be more +efficient than having to execute the lookup on all the shards of the collection. + +Another benefit of using the shard key(s) in the search expression is that +unique indexes are only supported if they contain the shard key(s). + +## `INSERT` AQL Operation with `overwriteMode` + +While the `UPSERT` AQL operation is very powerful and flexible, it is often not +the ideal choice for high-volume ingestion. + +A much more efficient alternative to the `UPSERT` AQL operation is the +[`INSERT` AQL operation](../high-level-operations/insert.md) with the `overwriteMode` +attribute set. This operation is not a drop-in replacement for `UPSERT`, but +rather a fast alternative in case the document key (`_key` attribute) is known +when the operation is executed, and none of the old values need to be referenced. + +The general syntax of the `INSERT` AQL operation is: + +```aql +INSERT +IN OPTIONS +``` + +As we will deal with the `overwriteMode` option here, we are focussing on +`INSERT` operations with this option set, for example: + +```aql +INSERT { _key: "index.html", status: "created" } +IN pages OPTIONS { overwriteMode: "ignore" } +``` + +Regardless of the selected `overwriteMode`, the `INSERT` operation will insert +the document if no document exists in the collection with the specified `_key`. +In this aspect it behaves as a regular `INSERT` operation. + +However, if a document with the specified `_key` already exists in the +collection, the `INSERT` behavior will be as follows, depending on the selected +`overwriteMode`: + +- `conflict` (default): if a document with the specified `_key` exists, return + a "unique constraint violation" +- `ignore`: if a document with the specified `_key` exists, do nothing. + Especially do not report a "unique constraint violation" error. +- `update`: if a document with the specified `_key` exists, (partially) update + the document with the attributes specified. +- `replace`: if a document with the specified `_key` exists, fully replace the + document with the attributes specified. + +If no `overwriteMode` is specified, the behavior of an `INSERT` operation is as +if the `overwriteMode` was set to `conflict`. + +The benefit of using `INSERT` with `overwriteMode` set to `ignore`, `update` or +`replace` is that the `INSERT` operation is going to be very fast, especially in +comparison with the `UPSERT` operation. In addition, `INSERT` will do a lookup +using the `_key` attribute, which is always indexed. So it will always use the +primary index and never do full collection scans. It also does not require +setting up additional indexes, because the primary index is automatically +present for all collections. + +There are also a few caveats when working with `INSERT` AQL operations: + +- They can only be used when the value of the `_key` attribute is known at the + time of insert. That means the client application must be able to provide the + document keys in a deterministic way. + +- The values that can be used for the `_key` attribute have some character and + length restrictions, but alphanumeric keys work well. + +- In a cluster setup, the underlying collection must be sharded by `_key`. This + is the default shard key, however. + +- There is no access to the data of an existing document for arbitrary + calculations when going into the `update` or `replace` mode. + +Please note that even though the `INSERT` AQL operation cannot refer to existing +documents to calculate values for updating/replacing, it can still return the +previous version of the document in case the document is already present. +This can be achieved by appending a `RETURN OLD` to the `INSERT` operation, +e.g. + +```aql +INSERT { _key: "index.html", status: "created" } +IN pages OPTIONS { overwriteMode: "replace" } +RETURN OLD +``` + +It is also possible to return the new version of the document (the inserted +document if no previous document existed, or the updated/replaced version in +case a document already existed) by using `RETURN NEW`: + +```aql +INSERT { _key: "index.html", status: "created" } +IN pages OPTIONS { overwriteMode: "replace" } +RETURN NEW +``` + +## Insert Operation not Using AQL + +There is the option to execute an insert operation with `overwriteMode` outside +of AQL. The [`POST /_api/document/{collection}`](../../develop/http-api/documents.md#create-multiple-documents) +endpoint is a dedicated REST API for insert operations, which can handle one +document, or multiple documents at once. + +Conceptually this API behaves like the `INSERT` AQL operation, but it can be +called with a batch of documents at once. This is the most efficient solution, +and should be preferred if possible. + +Most ArangoDB drivers also provide a means to insert multiple documents at once, +which will internally call this same REST API. + +The REST API provides the `returnOld` and `returnNew` options to make it return +the previous versions of documents or the insert/updated/replaced documents, in +the same way as the `INSERT` AQL operation can do. + +AQL `INSERT` queries with the `optimize-cluster-multiple-document-operations` +optimization applied perform similarly well in cluster deployments, but it +cannot be applied in all cases (see the list of +[optimizer rules](../execution-and-performance/query-optimization.md#optimize-cluster-multiple-document-operations) +for details). + +## Summary + +The `UPSERT` AQL operation is the most flexible way to conditionally insert or +update/replace documents in ArangoDB, but it is also the least efficient variant. + +The `INSERT` AQL operation with the `overwriteMode` set will outperform +`UPSERT`, but it can only be used for some use cases. + +Using the dedicated REST API for document inserts will be even more efficient, +and is thus the preferred option for bulk document inserts, but AQL `INSERT` +queries can be almost as fast. diff --git a/site/content/arangodb/oem/aql/execution-and-performance/_index.md b/site/content/arangodb/oem/aql/execution-and-performance/_index.md new file mode 100644 index 0000000000..305ecfedb8 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/_index.md @@ -0,0 +1,7 @@ +--- +title: AQL Execution and Performance +menuTitle: Execution and Performance +weight: 50 +description: >- + This chapter describes AQL features related to query execution and query performance +--- diff --git a/site/content/arangodb/oem/aql/execution-and-performance/caching-query-results.md b/site/content/arangodb/oem/aql/execution-and-performance/caching-query-results.md new file mode 100644 index 0000000000..8e76741ee5 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/caching-query-results.md @@ -0,0 +1,228 @@ +--- +title: The AQL query results cache +menuTitle: Caching query results +weight: 30 +description: >- + AQL provides an optional query results cache in single server deployments +--- +The purpose of the query results cache is to avoid repeated calculation of the same +query results. It is useful if data-reading queries repeat a lot and there are +not many write queries. + +The query results cache is transparent so users do not need to manually invalidate +results in it if underlying collection data are modified. + +{{< info >}} +The AQL query results cache is only available for single servers, i.e. servers that +are not part of a cluster setup. +{{< /info >}} + +## Modes + +The cache can be operated in the following modes: + +- `off`: The cache is disabled. No query results are stored. +- `on`: The cache stores the results of all AQL queries unless the `cache` + query option is set to `false`. +- `demand`: The cache stores the results of AQL queries that have the + `cache` query option set to `true` but ignores all others. + +The mode can be set at server startup as well as at runtime, see +[Global configuration](#global-configuration). + +## Query eligibility + +The query results cache considers two queries identical if they have exactly the +same query string and the same bind variables. Any deviation in terms of whitespace, +capitalization etc. is considered a difference. The query string is hashed +and used as the cache lookup key. If a query uses bind parameters, these are also +hashed and used as part of the cache lookup key. + +Even if the query strings of two queries are identical, the query results cache +treats them as different queries if they have different bind parameter +values. Other components that become part of a query's cache key are the +`count`, `fullCount`, and `optimizer` attributes. + +If the cache is enabled, it is checked whether it has a result ready for a +particular query at the very start of processing the query request. If this is +the case, the query result is served directly from the cache, which is normally +very efficient. If the query cannot be found in the cache, it is executed +as usual. + +If the query is eligible for caching and the cache is enabled, the query +result is stored in the query results cache so it can be used for subsequent +executions of the same query. + +A query is eligible for caching only if all of the following conditions are met: + +- The server the query executes on is a single server (i.e. not part of a cluster). +- The query is a read-only query and does not modify data in any collection. +- No warnings were produced while executing the query. +- The query is deterministic and only uses deterministic functions whose results + are marked as cacheable. +- The size of the query result does not exceed the cache's configured maximal + size for individual cache results or cumulated results. +- The query is not executed using a streaming cursor (`"stream": true` query option). + +The usage of non-deterministic functions leads to a query not being cacheable. +This is intentional to avoid caching of function results which should rather +be calculated on each invocation of the query (e.g. `RAND()` or `DATE_NOW()`). + +The query results cache considers all user-defined AQL functions to be non-deterministic +as it has no insight into these functions. + +## Cache invalidation + +The cached results are fully or partially invalidated automatically if +queries modify the data of collections that were used during the computation of +the cached query results. This is to protect users from getting stale results +from the query results cache. + +This also means that if the cache is turned on, then there is an additional +cache invalidation check for each data-modification operation (e.g. insert, update, +remove, truncate operations as well as AQL data-modification queries). + +**Example** + +If the result of the following query is present in the query results cache, +then either modifying data in the `users` or `organizations` collection +removes the already computed result from the cache: + +```aql +FOR user IN users + FOR organization IN organizations + FILTER user.organization == organization._key + RETURN { user: user, organization: organization } +``` + +Modifying data in other unrelated collections does not lead to this +query result being removed from the cache. + +## Performance considerations + +The query results cache is organized as a hash table, so looking up whether a query result +is present in the cache is fast. Still, the query string and the bind +parameter used in the query need to be hashed. This is a slight overhead that +is not present if the cache is disabled or a query is marked as not cacheable. + +Additionally, storing query results in the cache and fetching results from the +cache requires locking via a read/write lock. While many thread can read in parallel from +the cache, there can only be a single modifying thread at any given time. Modifications +of the query cache contents are required when a query result is stored in the cache +or during cache invalidation after data-modification operations. Cache invalidation +requires time proportional to the number of cached items that need to be invalidated. + +There may be workloads in which enabling the query results cache leads to a performance +degradation. It is not recommended to turn the query results cache on in workloads that only +modify data, or that modify data more often than reading it. Enabling the cache +also provides no benefit if queries are very diverse and do not repeat often. +In read-only or read-mostly workloads, the cache is beneficial if the same +queries are repeated lots of times. + +In general, the query results cache provides the biggest improvements for queries with +small result sets that take long to calculate. If query results are very big and +most of the query time is spent on copying the result from the cache to the client, +then the cache does not provide much benefit. + +## Global configuration + +The query results cache can be configured at server start with the +[`--query.cache-mode`](../../components/arangodb-server/options.md#--querycache-mode) +startup option. + +The cache mode can also be changed at runtime using the JavaScript API as follows: + +```js +require("@arangodb/aql/cache").properties({ mode: "on" }); +``` + +The maximum number of cached results in the cache for each database can be configured +at server start using the following configuration parameters: + +- `--query.cache-entries`: The maximum number of results in the query results cache per database +- `--query.cache-entries-max-size`: The maximum cumulated size of results in the query results cache per database +- `--query.cache-entry-max-size`: The maximum size of an individual result entry in query results cache +- `--query.cache-include-system-collections`: Whether to include system collection queries in the query results cache + +These parameters can be used to put an upper bound on the number and size of query +results in each database's query cache and thus restrict the cache's memory consumption. + +These value can also be adjusted at runtime as follows: + +```js +require("@arangodb/aql/cache").properties({ + maxResults: 200, + maxResultsSize: 8 * 1024 * 1024, + maxEntrySize: 1024 * 1024, + includeSystem: false +}); +``` + +The above settings limit the number of cached results in the query results cache to 200 +results per database, and to 8 MiB cumulated query result size per database. The maximum +size of each query cache entry is restricted to 1 MiB. Queries that involve system +collections are excluded from caching. + +You can also change the configuration at runtime with the +[HTTP API](../../develop/http-api/queries/aql-query-results-cache.md). + +## Per-query configuration + +When a query is sent to the server for execution and the cache is set to `on` or `demand`, +the query executor checks the query's `cache` option. If the query cache mode is +`on`, then not setting this query option or setting it to anything but `false` makes the +query executor consult the query results cache. If the query cache mode is `demand`, then setting +the `cache` option to `true` makes the executor look for the query in the query results cache. +When the query cache mode is `off`, the executor does not look for the query in the cache. + +The `cache` attribute can be set as follows via the `db._createStatement()` function: + +```js +var stmt = db._createStatement({ + query: "FOR doc IN users LIMIT 5 RETURN doc", + options: { + cache: true + } +}); + +stmt.execute(); +``` + +When using the `db._query()` function, the `cache` attribute can be set as follows: + +```js +db._query("FOR doc IN users LIMIT 5 RETURN doc", {}, { cache: true }); +``` + +You can also set the `cache` query option in the +[HTTP API](../../develop/http-api/queries/aql-queries.md#create-a-cursor). + +Each query result returned contain a `cached` attribute. It is set to `true` +if the result was retrieved from the query results cache, and `false` otherwise. Clients can use +this attribute to check if a specific query was served from the cache or not. + +## Query results cache inspection + +The contents of the query results cache can be checked at runtime using the cache's +`toArray()` function: + +```js +require("@arangodb/aql/cache").toArray(); +``` + +This returns a list of all query results stored in the current database's query +results cache. + +The query results cache for the current database can be cleared at runtime using the +cache's `clear` function: + +```js +require("@arangodb/aql/cache").clear(); +``` + +## Restrictions + +Query results that are returned from the query results cache may contain execution statistics +stemming from the initial, uncached query execution. This means for a cached query results, +the `extra.stats` attribute may contain stale data, especially in terms of the `executionTime` +and `profile` attribute values. diff --git a/site/content/arangodb/oem/aql/execution-and-performance/explaining-queries.md b/site/content/arangodb/oem/aql/execution-and-performance/explaining-queries.md new file mode 100644 index 0000000000..09a4176705 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/explaining-queries.md @@ -0,0 +1,278 @@ +--- +title: Explain AQL Queries +menuTitle: Explaining queries +weight: 15 +description: >- + You can explain and profile AQL queries to inspect the execution plans and to + understand the performance characteristics, as well as create debug packages + for reporting issues +# Undocumented on purpose: +# require("@arangodb/aql/explainer").explainRegisters(data, options, shouldPrint); +# require("@arangodb/aql/explainer").debug(query, bindVars, options); +--- +If it is unclear how a given query will perform, clients can retrieve a query's execution plan +from the AQL query optimizer without actually executing the query. Getting the query execution +plan from the optimizer is called *explaining*. + +An explain throws an error if the given query is syntactically invalid. Otherwise, it +returns the execution plan and some information about what optimizations could be applied to +the query. The query is not executed. + +You can explain a query using the [HTTP REST API](../../develop/http-api/queries/aql-queries.md#explain-an-aql-query) +or via _arangosh_. + +## Inspecting query plans + +The `explain()` method of an `ArangoStatement` (`db._createStatement(...).explain()`) +creates very verbose output. To get a human-readable output of the query plan, +you can use `db._explain()`. You can use it as follows (without disabling syntax +highlighting with `{ colors: false }`): + +```js +--- +name: 01_workWithAQL_databaseExplain +description: '' +--- +db._explain("LET s = SLEEP(0.25) LET t = SLEEP(0.5) RETURN 1", {}, {colors: false}); +``` + +The plan contains all execution nodes that are used during a query. These nodes represent different +stages in a query. Each stage gets the input from the stage directly above (its dependencies). +The plan shows you the estimated number of items (results) for each query stage (under **Est.**). Each +query stage roughly equates to a line in your original query, which you can see under **Comment**. + +## Profiling queries + +Sometimes when you have a complex query it can be unclear on what time is spent +during the execution, even for intermediate ArangoDB users. + +By profiling a query it gets executed with special instrumentation code enabled. +It gives you all the usual information like when explaining a query, but +additionally you get the query profile, [runtime statistics](query-statistics.md) +and per-node statistics. + +To use this in an interactive fashion in the shell, you can call +`db._profileQuery()`, or use the web interface. You can use `db._profileQuery()` +as follows (without disabling syntax highlighting with `{ colors: false }`): + +```js +--- +name: 01_workWithAQL_databaseProfileQuery +description: '' +--- +db._profileQuery("LET s = SLEEP(0.25) LET t = SLEEP(0.5) RETURN 1", {}, {colors: false}); +``` + +For more information, see [Profiling Queries](query-profiling.md). + +## Execution plans in detail + +By default, the query optimizer returns what it considers to be the *optimal plan*. The +optimal plan is returned in the `plan` attribute of the result. If `explain` is +called with the `allPlans` option set to `true`, all plans are returned in the `plans` +attribute. + +The result object also contains a `warnings` attribute, which is an array of +warnings that occurred during optimization or execution plan creation. + +Each plan in the result is an object with the following attributes: +- `nodes`: the array of execution nodes of the plan. See the list of + [execution nodes](query-optimization.md#execution-nodes) +- `estimatedCost`: the total estimated cost for the plan. If there are multiple + plans, the optimizer chooses the plan with the lowest total cost. +- `collections`: an array of collections used in the query +- `rules`: an array of rules the optimizer applied. See the list of + [optimizer rules](query-optimization.md#optimizer-rules) +- `variables`: array of variables used in the query (note: this may contain + internal variables created by the optimizer) + +Here is an example for retrieving the execution plan of a simple query: + +```js +--- +name: 07_workWithAQL_statementsExplain +description: '' +--- +var stmt = db._createStatement("FOR user IN _users RETURN user"); +stmt.explain(); +``` + +As the output of `explain()` is very detailed, it is recommended to use some +scripting to make the output less verbose: + +```js +--- +name: 08_workWithAQL_statementsPlans +description: '' +--- +var formatPlan = function (plan) { + return { estimatedCost: plan.estimatedCost, + nodes: plan.nodes.map(function(node) { + return node.type; }) }; }; +formatPlan(stmt.explain().plan); +``` + +If a query contains bind parameters, they must be added to the statement **before** +`explain()` is called: + +```js +--- +name: 09_workWithAQL_statementsPlansBind +description: '' +--- +var stmt = db._createStatement( + `FOR doc IN @@collection FILTER doc.user == @user RETURN doc` +); +stmt.bind({ "@collection" : "_users", "user" : "root" }); +stmt.explain(); +``` + +In some cases, the AQL optimizer creates multiple plans for a single query. By default +only the plan with the lowest total estimated cost is kept, and the other plans are +discarded. To retrieve all plans the optimizer has generated, `explain` can be called +with the option `allPlans` set to `true`. + +In the following example, the optimizer has created two plans: + +```js +--- +name: 10_workWithAQL_statementsPlansOptimizer0 +description: '' +--- +var stmt = db._createStatement( + "FOR user IN _users FILTER user.user == 'root' RETURN user"); +stmt.explain({ allPlans: true }).plans.length; +``` + +To see a slightly more compact version of the plan, the following +transformation can be applied: + +```js +--- +name: 10_workWithAQL_statementsPlansOptimizer1 +description: '' +--- +~var stmt = db._createStatement("FOR user IN _users FILTER user.user == 'root' RETURN user"); +stmt.explain({ allPlans: true }).plans.map( + function(plan) { return formatPlan(plan); }); +``` + +`explain()` also accepts the following additional options: +- `maxPlans`: limits the maximum number of plans that are created by the AQL query optimizer +- `optimizer`: + - `rules`: an array of to-be-included or to-be-excluded optimizer rules + can be put into this attribute, telling the optimizer to include or exclude + specific rules. To disable a rule, prefix its name with a `-`, to enable a rule, prefix it + with a `+`. There is also a pseudo-rule `all`, which matches all optimizer rules. + +The following example disables all optimizer rules but `remove-redundant-calculations`: + +```js +--- +name: 10_workWithAQL_statementsPlansOptimizer2 +description: '' +--- +~var stmt = db._createStatement("FOR user IN _users FILTER user.user == 'root' RETURN user"); +stmt.explain({ optimizer: { + rules: [ "-all", "+remove-redundant-calculations" ] } }); +``` + +The contents of an execution plan are meant to be machine-readable. To get a human-readable +version of a query's execution plan, the following commands can be used +(without disabling syntax highlighting with `{ colors: false }`): + +```js +--- +name: 10_workWithAQL_statementsPlansOptimizer3 +description: '' +--- +var query = "FOR doc IN mycollection FILTER doc.value > 42 RETURN doc"; +require("@arangodb/aql/explainer").explain(query, {colors:false}); +``` + +The above command prints the query's execution plan in the ArangoShell +directly, focusing on the most important information. + +## Gathering debug information about a query + +If an explain provides no suitable insight into why a query does not perform as +expected, it may be reported to the Arango support. In order to make this as easy +as possible, there is a built-in command in ArangoShell for packaging the query, its +bind parameters, and all data required to execute the query elsewhere. + +`require("@arangodb/aql/explainer").debugDump(filepath, query[, bindVars[, options]])` + +You can specify the following parameters: + +- `filepath` (string): A file path to save the debug package to +- `query` (string): An AQL query +- `bindVars` (object, _optional_): The bind parameters for the query +- `options` (object, _optional_): Options for the query, with two additionally + supported settings compared to `db._query()`: + - `examples` (number, _optional_): How many sample documents of your + collection data to include. Default: `0` + - `anonymize` (boolean, _optional_): Whether all string attribute values of + the sample documents shall be substituted with strings like `XXX`. + +The command stores all data in a file with a configurable filename: + +```js +--- +name: 10_workWithAQL_debugging1 +description: '' +--- +var query = "FOR doc IN mycollection FILTER doc.value > 42 RETURN doc"; +require("@arangodb/aql/explainer").debugDump("/tmp/query-debug-info", query); +``` + +Entitled users can send the generated file to the Arango support to facilitate +reproduction and debugging. + +{{< tip >}} +You can also create debug packages using the web interface, see +[Query debug packages](../../operations/troubleshooting/query-debug-packages.md). +{{< /tip >}} + +If a query contains bind parameters, you need to specify them along with the query +string: + +```js +--- +name: 10_workWithAQL_debugging2 +description: '' +--- +var query = "FOR doc IN @@collection FILTER doc.value > @value RETURN doc"; +var bindVars = { value: 42, "@collection": "mycollection" }; +require("@arangodb/aql/explainer").debugDump("/tmp/query-debug-info", query, bindVars); +``` + +It is also possible to include example documents from the underlying collection in +order to make reproduction even easier. Example documents can be sent as they are, or +in an anonymized form. The number of example documents can be specified in the `examples` +options attribute, and should generally be kept low. The `anonymize` option replaces +the contents of string attributes in the examples with `XXX`. However, it does not +replace any other types of data (e.g. numeric values) or attribute names. Attribute +names in the examples are always preserved because they may be indexed and used in +queries: + +```js +--- +name: 10_workWithAQL_debugging3 +description: '' +--- +var query = "FOR doc IN @@collection FILTER doc.value > @value RETURN doc"; +var bind = { value: 42, "@collection": "mycollection" }; +var options = { examples: 10, anonymize: true }; +require("@arangodb/aql/explainer").debugDump("/tmp/query-debug-info", query, bind, options); +``` + +To get a human-readable output from a debug package JSON file, you can use the +`inspectDump()` method: + +`require("@arangodb/aql/explainer").inspectDump(inFilepath[, outFilepath])` + +You can specify the following parameters: + +- `inFilepath` (string): The path to the debug package JSON file +- `outFilepath` (string, _optional_): A path to store the formatted output in a + file instead of printing to the shell diff --git a/site/content/arangodb/oem/aql/execution-and-performance/parsing-queries.md b/site/content/arangodb/oem/aql/execution-and-performance/parsing-queries.md new file mode 100644 index 0000000000..8c87fab393 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/parsing-queries.md @@ -0,0 +1,32 @@ +--- +title: Parsing AQL queries +menuTitle: Parsing queries +weight: 10 +description: >- + Clients can check if given AQL queries are syntactically valid using an + HTTP API or JavaScript API +--- +ArangoDB provides an [HTTP REST API](../../develop/http-api/queries/aql-queries.md) +for parsing and thus statically validating queries. + +A query can also be parsed from the ArangoShell using `ArangoStatement`'s `parse` method. The +`parse` method will throw an exception if the query is syntactically invalid. Otherwise, it will +return the some information about the query. + +The return value is an object with the collection names used in the query listed in the +`collections` attribute, and all bind parameters listed in the `bindVars` attribute. +Additionally, the internal representation of the query, the query's abstract syntax tree, will +be returned in the `AST` attribute of the result. Please note that the abstract syntax tree +will be returned without any optimizations applied to it. + +```js +--- +name: 11_workWithAQL_parseQueries +description: '' +--- +var stmt = db._createStatement( + "FOR doc IN @@collection FILTER doc.foo == @bar RETURN doc"); +stmt.parse(); +~removeIgnoreCollection("mycollection") +~db._drop("mycollection") +``` diff --git a/site/content/arangodb/oem/aql/execution-and-performance/query-optimization.md b/site/content/arangodb/oem/aql/execution-and-performance/query-optimization.md new file mode 100644 index 0000000000..919543e71e --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/query-optimization.md @@ -0,0 +1,626 @@ +--- +title: The AQL query optimizer +menuTitle: Query Optimization +weight: 25 +description: >- + AQL queries are sent through an optimizer before execution that creates an + initial execution plan, looks for optimization opportunities, and applies them +pageToc: + maxHeadlineLevel: 3 +--- +AQL queries are parsed and planned. The optimizer might produce multiple execution plans +for a single query. It then calculates the costs for all plans and picks the plan with the +lowest total cost. This resulting plan is considered to be the *optimal plan*, which is +then executed. + +The optimizer is designed to only perform optimizations if they are *safe*, in the +sense that an optimization should not modify the result of a query. A notable exception +to this is that the optimizer is allowed to change the order of results for queries that +do not explicitly specify how results should be sorted. + +## Execution plans + +The `explain` command can be used to query the optimal executed plan or even all plans +the optimizer has generated. Additionally, `explain` can reveal some more information +about the optimizer's view of the query. + +### Inspecting plans using the explain helper + +The `explain` method of `ArangoStatement` as shown in the next chapters creates very verbose output. +You can work on the output programmatically, or use this handsome tool that we created +to generate a more human readable representation. + +You may use it like this: (we disable syntax highlighting here) + +```js +--- +name: AQLEXP_01_axplainer +description: '' +--- +~addIgnoreCollection("test") +~db._drop("test"); +var coll = db._create("test"); +for (i = 0; i < 100; ++i) { db.test.save({ value: i }); } +var idx = db.test.ensureIndex({ type: "persistent", fields: [ "value" ] }); +var explain = require("@arangodb/aql/explainer").explain; +explain("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value", {colors:false}); +``` + +### Execution plans in detail + +Let's have a look at the raw json output of the same execution plan +using the `explain` method of `ArangoStatement`: + +```js +--- +name: AQLEXP_01_explainCreate +description: '' +--- +var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain(); +``` + +As you can see, the result details are very verbose. They are not covered in +detail for brevity in the next sections. Instead, let's take a closer look at +the results step by step. + +#### Execution nodes of a query + +In general, an execution plan can be considered to be a pipeline of processing steps. +Each processing step is carried out by a so-called *execution node* + +The `nodes` attribute of the `explain` result contains these *execution nodes* in +the *execution plan*. The output is still very verbose, so here's a shorted form of it: + +```js +--- +name: AQLEXP_02_explainOverview +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain().plan.nodes.map(function (node) { return node.type; }); +``` + +Note that the list of nodes might slightly change in future versions of ArangoDB if +new execution node types get added or the optimizer create somewhat more +optimized plans. + +When a plan is executed, the query execution engine starts with the node at +the bottom of the list (i.e. the `ReturnNode`). + +The `ReturnNode`'s purpose is to return data to the caller. It does not produce +data itself, but it asks the node above itself, which is the `CalculationNode` +in our example. +`CalculationNode`s are responsible for evaluating arbitrary expressions. In our +example query, the `CalculationNode` evaluates the value of `i.value`, which +is needed by the `ReturnNode`. The calculation is applied for all data the +`CalculationNode` gets from the node above it, in our example the `IndexNode`. + +Finally, all of this needs to be done for documents of collection `test`. This is +where the `IndexNode` enters the game. It uses an index (thus its name) +to find certain documents in the collection and ships it down the pipeline in the +order required by `SORT i.value`. The `IndexNode` itself has a `SingletonNode` +as its input. The sole purpose of a `SingletonNode` node is to provide a single empty +document as input for other processing steps. It is always the end of the pipeline. + +Here is a summary: +- SingletonNode: produces an empty document as input for other processing steps. +- IndexNode: iterates over the index on attribute `value` in collection `test` + in the order required by `SORT i.value`. +- CalculationNode: evaluates the result of the calculation `i.value > 97` to `true` or `false` +- CalculationNode: calculates return value `i.value` +- ReturnNode: returns data to the caller + +#### Optimizer rules used for a query + +Note that in the example, the optimizer has optimized the `SORT` statement away. +It can do it safely because there is a sorted persistent index on `i.value`, which it has +picked in the `IndexNode`. As the index values are iterated over in sorted order +anyway, the extra `SortNode` would have been redundant and was removed. + +Additionally, the optimizer has done more work to generate an execution plan that +avoids as much expensive operations as possible. Here is the list of optimizer rules +that were applied to the plan: + +```js +--- +name: AQLEXP_03_explainRules +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain().plan.rules; +``` + +Here is the meaning of these rules in context of this query: +- `move-calculations-up`: Moves a `CalculationNode` and subqueries, when independent from the outer node, + as far up in the processing pipeline as possible. +- `move-filters-up`: Moves a `FilterNode` as far up in the processing pipeline as + possible. +- `remove-redundant-calculations`: Replaces references to variables with references to + other variables that contain the exact same result. In the example query, `i.value` + is calculated multiple times, but each calculation inside a loop iteration would + produce the same value. Therefore, the expression result is shared by several nodes. +- `remove-unnecessary-calculations`: Removes `CalculationNode`s whose result values are + not used in the query. In the example this happens due to the `remove-redundant-calculations` + rule having made some calculations unnecessary. +- `use-indexes`: Use an index to iterate over a collection instead of performing a + full collection scan. In the example case this makes sense, as the index can be + used for filtering and sorting. +- `remove-filter-covered-by-index`: Remove an unnecessary filter whose functionality + is already covered by an index. In this case the index only returns documents + matching the filter. +- `use-index-for-sort`: Removes a `SORT` operation if it is already satisfied by + traversing over a sorted index. + +Note that some rules may appear multiple times in the list, with number suffixes. +This is due to the same rule being applied multiple times, at different positions +in the optimizer pipeline. + +Also see the full list of [optimizer rules](#optimizer-rules) below. + +#### Collections used in a query + +The list of collections used in a plan (and query) is contained in the `collections` +attribute of a plan: + +```js +--- +name: AQLEXP_04_explainCollections +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain().plan.collections +``` + +The `name` attribute contains the name of the `collection`, and `type` is the +access type, which can be either `read` or `write`. + +#### Variables used in a query + +The optimizer returns a list of variables used in a plan (and query). This +list contains auxiliary variables created by the optimizer itself. You can +ignore this list in most cases. + +#### Cost of a query + +For each plan the optimizer generates, it calculates the total cost. The plan +with the lowest total cost is considered to be the optimal plan. Costs are +estimates only, as the actual execution costs are unknown to the optimizer. +Costs are calculated based on heuristics that are hard-coded into execution nodes. +Cost values do not have any unit. + +### Retrieving all execution plans + +To retrieve not just the optimal plan but a list of all plans the optimizer has +generated, set the option `allPlans` to `true`: + +This returns a list of all plans in the `plans` attribute instead of in the +`plan` attribute: + +```js +--- +name: AQLEXP_05_explainAllPlans +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain({ allPlans: true }); +``` + +### Retrieving the plan as it was generated by the parser / lexer + +To retrieve the plan which closely matches your query, you may turn off most +optimization rules (i.e. cluster rules cannot be disabled if you're running +the explain on a cluster Coordinator) set the option `rules` to `-all`: + +This returns an unoptimized plan in the `plan`: + +```js +--- +name: AQLEXP_06_explainUnoptimizedPlans +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain({ optimizer: { rules: [ "-all" ] } }); +``` + +Note that some optimizations are already done at parse time (i.e. evaluate simple constant +calculation as `1 + 1`) + +## Turning specific optimizer rules off + +Optimizer rules can also be turned on or off individually, using the `rules` attribute. +This can be used to enable or disable one or multiple rules. Rules that shall be enabled +need to be prefixed with a `+`, rules to be disabled should be prefixed with a `-`. The +pseudo-rule `all` matches all rules. + +Rules specified in `rules` are evaluated from left to right, so the following works to +turn on just the one specific rule: + +```js +--- +name: AQLEXP_07_explainSingleRulePlans +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain({ optimizer: { rules: [ "-all", "+use-index-range" ] } }); +``` + +By default, all rules are turned on. To turn off just a few specific rules, use something +like this: + +```js +--- +name: AQLEXP_08_explainDisableSingleRulePlans +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain({ optimizer: { rules: [ "-use-index-range", "-use-index-for-sort" ] } }); +``` + +The maximum number of plans created by the optimizer can also be limited using the +`maxNumberOfPlans` attribute: + +```js +--- +name: AQLEXP_09_explainMaxNumberOfPlans +description: '' +--- +~var stmt = db._createStatement("FOR i IN test FILTER i.value > 97 SORT i.value RETURN i.value"); +stmt.explain({ maxNumberOfPlans: 1 }); +``` + +## Optimizer statistics + +The optimizer provides statistics as a part of an `explain` result. +The following attributes are returned in the `stats` attribute: + +- `plansCreated`: The total number of plans created by the optimizer. +- `rulesExecuted`: The number of rules executed. Note that an executed rule does + not indicate that a plan has actually been modified by a rule. +- `rulesSkipped`: The number of rules skipped by the optimizer. +- `executionTime`: The (wall-clock) time in seconds needed to explain the query. +- `peakMemoryUsage`: The maximum memory usage of the query during explain. + +## Warnings + +For some queries, the optimizer may produce warnings. These are returned in +the `warnings` attribute of the `explain` result: + +```js +--- +name: AQLEXP_10_explainWarn +description: '' +--- +var stmt = db._createStatement("FOR i IN 1..10 RETURN 1 / 0") +stmt.explain().warnings; +~db._drop("test") +~removeIgnoreCollection("test") +``` + +There is an upper bound on the number of warnings a query may produce. If that +bound is reached, no further warnings are returned. + +## Optimization in a cluster + +When you are running AQL in the cluster, the parsing of the query is done on the +Coordinator. The Coordinator then chops the query into snippets, which are either +to remain on the Coordinator or need to be distributed to the shards on the +DB-Servers over the network. The cutting sites are interconnected via `ScatterNode`s, +`GatherNode`s and `RemoteNode`s. These nodes mark the network borders of the snippets. + +The optimizer strives to reduce the amount of data transferred via these network +interfaces by pushing `FILTER`s out to the shards, as it is vital to the query +performance to reduce that data amount to transfer over the network links. + +{{< info >}} +Some hops between Coordinators and DB-Servers are unavoidable. An example are +[user-defined functions](../user-defined-functions.md) (UDFs), which have to be executed on +the Coordinator. If you cannot modify your query to have a lower amount of +back and forth between sites, then try to lower the amount of data that has +to be transferred between them. In case of UDFs, use effective FILTERs before +calling them. +{{< /info >}} + +Using a cluster, there is a **Site** column if you explain a query. +Snippets marked with **DBS** are executed on DB-Servers, **COOR** ones are +executed on the respective Coordinator. + +```aql +Query String (57 chars, cacheable: false): + FOR doc IN test UPDATE doc WITH { updated: true } IN test + +Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 3 CalculationNode DBS 1 - LET #3 = { "updated" : true } + 13 IndexNode DBS 1000000 - FOR doc IN test /* primary index scan, index only, projections: `_key`, 5 shard(s) */ + 4 UpdateNode DBS 0 - UPDATE doc WITH #3 IN test + 7 RemoteNode COOR 0 - REMOTE + 8 GatherNode COOR 0 - GATHER +``` + +## Execution nodes + +### List of execution nodes + +The following execution node types appear in the output of `explain`: + +- **CalculationNode**: + Evaluates an expression. The expression result may be used by + other nodes, e.g. `FilterNode`, `EnumerateListNode`, `SortNode` etc. + +- **CollectNode**: + Aggregates its input and produces new output variables. This appears + once per `COLLECT` statement. + +- **EnumerateCollectionNode**: + Enumeration over documents of a collection (given in its *collection* + attribute) without using an index. + +- **EnumerateListNode**: + Enumeration over a list of (non-collection) values. + +- **EnumerateViewNode**: + Enumeration over documents of a View. + +- **FilterNode**: + Only lets values pass that satisfy a filter condition. Appears once + per `FILTER` statement. + +- **IndexNode**: + Enumeration over one or many indexes (given in its *indexes* attribute) + of a collection. The index ranges are specified in the *condition* attribute + of the node. + +- **InsertNode**: + Inserts documents into a collection (given in its *collection* attribute). + Appears exactly once in a query that contains an *INSERT* statement. + +- **KShortestPathsNode**: + Indicates a traversal for k Shortest Paths (`K_SHORTEST_PATHS` in AQL). + +- **KPathsNode**: + Indicates a traversal for k Paths (`K_PATHS` in AQL). + +- **LimitNode**: + Limits the number of results passed to other processing steps. Appears + once per `LIMIT` statement. + +- **MaterializeNode**: + The presence of this node means that the query is not fully covered by + indexes and therefore needs to involve the storage engine. + +- **RemoveNode**: + Removes documents from a collection (given in its *collection* attribute). + Appears exactly once in a query that contains a `REMOVE` statement. + +- **ReplaceNode**: + Replaces documents in a collection (given in its *collection* attribute). + Appears exactly once in a query that contains a `REPLACE` statement. + +- **ReturnNode**: + Returns data to the caller. Appears in each read-only query at + least once. Subqueries also contain `ReturnNode`s. + +- **SingletonNode**: + The purpose of a `SingletonNode` is to produce an empty document that is + used as input for other processing steps. Each execution plan contains + exactly one `SingletonNode` as its top node. + +- **ShortestPathNode**: + Indicates a traversal for a Shortest Path (`SHORTEST_PATH` in AQL). + +- **SortNode**: + Performs a sort of its input values. + +- **SubqueryEndNode**: + End of a spliced (inlined) subquery. + +- **SubqueryNode**: + Executes a subquery. + +- **SubqueryStartNode**: + Beginning of a spliced (inlined) subquery. + +- **TraversalNode**: + Indicates a regular graph traversal, as opposed to a shortest path(s) + traversal. + +- **UpdateNode**: + Updates documents in a collection (given in its *collection* attribute). + Appears exactly once in a query that contains an `UPDATE` statement. + +- **UpsertNode**: + Upserts documents in a collection (given in its *collection* attribute). + Appears exactly once in a query that contains an `UPSERT` statement. + +### List of cluster execution nodes + +For queries in the cluster, the following additional nodes may appear in +execution plans: + +- **DistributeNode**: + Used on a Coordinator to fan-out data to one or multiple shards, + taking into account a collection's shard key. + +- **GatherNode**: + Used on a Coordinator to aggregate results from one or many shards + into a combined stream of results. Parallelizes work for certain types + of queries when there are multiple DB-Servers involved + (shown as `GATHER /* parallel */` in query explain). + +- **RemoteNode**: + A `RemoteNode` performs communication with another ArangoDB instances + in the cluster. For example, the cluster Coordinator needs to communicate + with other servers to fetch the actual data from the shards. It does so + via `RemoteNode`s. The data servers themselves might again pull further data + from the Coordinator, and thus might also employ `RemoteNode`s. So, all of + the above cluster relevant nodes are accompanied by a `RemoteNode`. + +- **ScatterNode**: + Used on a Coordinator to fan-out data to one or multiple shards. + +- **SingleRemoteOperationNode**: + Used on a Coordinator to directly work with a single + document on a DB-Server that is referenced by its `_key`. + +- **MultipleRemoteExecutionNode**: + Used to optimize bulk `INSERT` operations in cluster deployments, reducing the + setup and shutdown overhead and the number of internal network requests. + +## Optimizer rules + +### List of optimizer rules + +The following user-facing optimizer rules exist and are enabled by default +unless noted otherwise. You can +[enable and disable optimizer rules](#turning-specific-optimizer-rules-off) +except for a few required rules. + +Some rules exist multiple times with number suffixes like `-2`, +(e.g. `remove-unnecessary-calculations-2`). This is due to the same rule being +applied multiple times at different optimization stages. + +{{% comment %}} Execute code but exclude its output from rendering + +```js +--- +name: 00_dumpOptimizerRules +description: '' +type: cluster +--- +var url = "/_api/query/rules"; +var rules = internal.arango.GET(url); +assert(Array.isArray(rules)); +assert(rules.some(e => e.flags && e.flags.clusterOnly)); +var outfile = "Documentation/optimizer-rules.json"; +assert(fs.write(outfile, JSON.stringify(rules, undefined, 2))); +``` + +{{% /comment %}} + +{{% optimizer-rules %}} + +### Additional optimizations applied + +#### Scan-Only Optimization + +If a query iterates over a collection (for filtering or counting) but does not need +the actual document values later, the optimizer can apply a "scan-only" optimization +for `EnumerateCollectionNode` and `IndexNode` node types. In this case, it does not build up +a result with the document data at all, which may reduce work significantly. +In case the document data is actually not needed later on, it may be sensible to remove +it from query strings so the optimizer can apply the optimization. + +If the optimization is applied, it shows up as `scan only` in an AQL +query's execution plan for an `EnumerateCollectionNode` or an `IndexNode`. + +#### Index-Only Optimization + +The optimizer can apply an "index-only" optimization for AQL queries that +can satisfy the retrieval of all required document attributes directly from an index. + +This optimization is triggered if an index is used +that covers all required attributes of the document used later on in the query. +If applied, it saves retrieving the actual document data (which would require +an extra lookup by the storage engine), but instead builds the document data solely +from the index values found. It only applies when using up to 5 (or +[`maxProjections`](../high-level-operations/for.md#maxprojections)) attributes +from the document, and only if the rest of the document data is not used later +on in the query. + +The optimization is available for the following index types: `primary`, +`edge`, and `persistent`. + +If the optimization is applied, it shows up as `index only` in an AQL +query's execution plan for an `IndexNode`. + +#### Filter Projections Optimizations + +Introduced: v3.10.0 + +If an index is used that does not cover all required attributes for the query, +but if it is followed by filter conditions that only access attributes that are +part of the index, then an optimization is applied, to only fetch matching +documents. "Part of the index" here means, that all attributes referred to in +the post-filter conditions are contained in the `fields` or `storedValues` +attributes of the index definition. + +For example, the optimization is applied in the following case: +- There is a persistent index on the attributes `[ "value1", "value2" ]` + (in this order), or there is a persistent index on just `["value1"]` and + with a `storedValues` definition of `["value2"]`. +- There is a filter condition on `value1` that can use the index, and a filter + condition on `value2` that cannot use the index (post-filter condition). + +Example query: + +```aql +FOR doc IN collection + FILTER doc.value1 == @value1 /* uses the index */ + FILTER ABS(doc.value2) != @value2 /* does not use the index */ + RETURN doc +``` + +This query's execution plan looks as follows: + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 8 IndexNode 0 - FOR doc IN collection /* persistent index scan (filter projections: `value2`) */ FILTER (ABS(doc.`value2`) != 2) /* early pruning */ + 7 ReturnNode 0 - RETURN doc + +Indexes used: + By Name Type Collection Unique Sparse Cache Selectivity Fields Ranges + 8 idx_1737498319258648576 persistent collection false false false 99.96 % [ `value1`, `value2` ] (doc.`value1` == 1) +``` + +The first filter condition is transformed to an index lookup, as you can tell +from the `persistent index scan` comment and the `Indexes used` section that +shows the range `` doc.`value` == 1 ``. The post-filter condition +`FILTER ABS(doc.value2) != 2` can be recognized as such by the `early pruning` +comment that follows it. + +The `filter projections` mentioned in the above execution plan is an indicator +of the optimization being triggered. + +Instead of fetching the full documents from the storage engine for all index +entries that matched the index lookup condition, only those that also satisfy +the index lookup post-filter condition are fetched. +If the post-filter condition filters out a lot of documents, this optimization +can significantly speed up queries that produce large result sets from index +lookups but filter many of the documents away with post-filter conditions. + +Note that the optimization can also be combined with regular projections, e.g. +for the following query that returns a specific attribute from the documents +only: + +```aql +FOR doc IN collection + FILTER doc.value1 == @value1 /* uses the index */ + FILTER ABS(doc.value2) != @value2 /* does not use the index */ + RETURN doc.value3 +``` + +That query's execution plan combines projections from the index for the +post-filter condition (`filter projections`) as well as regular projections +(`projections`) for the processing parts of the query that follow the +post-filter condition: + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 9 IndexNode 5000 - FOR doc IN collection /* persistent index scan (filter projections: `value2`) (projections: `value3`) */ FILTER (ABS(doc.`value2`) != 2) /* early pruning */ + 7 CalculationNode 5000 - LET #5 = doc.`value3` /* attribute expression */ /* collections used: doc : collection */ + 8 ReturnNode 5000 - RETURN #5 + +Indexes used: + By Name Type Collection Unique Sparse Cache Selectivity Fields Ranges + 9 idx_1737498319258648576 persistent collection false false false 99.96 % [ `value1`, `value2` ] (doc.`value1` == 1) +``` + +The optimization is most effective for queries in which many documents would +be selected by the index lookup condition, but many are filtered out by the +post-filter condition. diff --git a/site/content/arangodb/oem/aql/execution-and-performance/query-profiling.md b/site/content/arangodb/oem/aql/execution-and-performance/query-profiling.md new file mode 100644 index 0000000000..68222c43f7 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/query-profiling.md @@ -0,0 +1,229 @@ +--- +title: Profiling and Hand-Optimizing AQL queries +menuTitle: Query Profiling +weight: 20 +description: >- + For understanding the performance of specific queries, you can profile them to + identify slow parts of query execution plans +--- +ArangoDB allows you to execute your query with special instrumentation code enabled. +It provides you a query plan with detailed execution statistics. + +To use this in an interactive fashion on the shell you can use +`db._profileQuery(..)` in _arangosh_. Alternatively, there is a button +_Profile_ in the Query tab of the web interface. + +The printed execution plan then contains three additional columns: + +- **Call**: The number of times this query stage was executed +- **Items**: The number of temporary result rows (outputs) at this stage +- **Filtered**: The number of rows filtered away by this stage +- **Runtime**: The total time spent in this stage + +Below the execution plan there are additional sections for the overall runtime +statistics and the query profile. + +## Example: Simple AQL query + +Assuming we got a collection named `acollection` and insert 10000 documents +via `for (let i=0; i < 10000;i++) db.acollection.insert({value:i})`. +Then a simple query filtering for `value < 10` will return 10 results: + +```js +--- +name: 01_workWithAQL_profileQuerySimple +description: '' +--- +~db._drop("acollection"); +~db._create('acollection'); +~for (let i=0; i < 10000; i++) { db.acollection.insert({value:i}); } +db._profileQuery(` + FOR doc IN acollection + FILTER doc.value < 10 + RETURN doc`, {}, {colors: false} +); +~db._drop("acollection"); +``` + +An AQL query is essentially executed in a pipeline that chains together different +functional execution blocks. Each block gets the input rows from the parent above +it, does some processing and then outputs a certain number of output rows. + +Without any detailed insight into the query execution it is impossible to tell +how many results each pipeline-block had to work on and how long this took. +By executing the query with the query profiler (`db._profileQuery()` or via +the _Profile_ button in the web interface) you can check exactly how much work +each stage had to do. + +Without any indexes this query should have to perform the following operations: + +1. Perform a full collection scan via a _EnumerateCollectionNode_ and outputting + a row containing the document in `doc`. +2. Calculate the boolean expression `LET #1 = doc.value < 10` from all inputs + via a _CalculationNode_ +3. Filter out all input rows where `#1` is false via the _FilterNode_ +4. Put the `doc` variable of the remaining rows into the result set via + the _ResultNode_ + +The _EnumerateCollectionNode_ processed and returned all 10k rows (documents), +as did the _CalculationNode_. Because the AQL execution engine also uses an +internal batch size of 1000 these blocks were also called 100 times each. +The _FilterNode_ as well as the _ReturnNode_ however only ever returned 10 rows +and only had to be called once, because the result size fits within a single batch. + +Let us add a persistent index on `value` to speed up the query: + +```js +db.acollection.ensureIndex({type:"persistent", fields:["value"]}); +``` + +```js +--- +name: 02_workWithAQL_profileQuerySimpleIndex +description: '' +--- +~db._create('acollection'); +~db.acollection.ensureIndex({type:"persistent", fields:["value"]}); +~for (let i=0; i < 10000; i++) { db.acollection.insert({value:i}); } +db._profileQuery(` + FOR doc IN acollection + FILTER doc.value < 10 + RETURN doc`, {}, {colors: false} +); +~db._drop("acollection"); +``` + +This results in replacing the collection scan and filter block with an +`IndexNode`. The execution pipeline of the AQL query has become much shorter. +Also the number of rows processed by each pipeline block is only 10, because +we no longer need to look at all documents. + +## Example: AQL with Subquery + +Let us consider a query containing a subquery: + +```js +--- +name: 03_workWithAQL_profileQuerySubquery +description: '' +--- +~db._create('acollection'); +~db.acollection.ensureIndex({type:"persistent", fields:["value"]}); +~for (let i=0; i < 10000;i++) { db.acollection.insert({value:i}); } +db._profileQuery(` + LET list = (FOR doc in acollection FILTER doc.value > 90 RETURN doc) + FOR a IN list + FILTER a.value < 91 + RETURN a`, {}, {colors: false, optimizer:{rules:["-all"]}} +); +~db._drop("acollection"); +``` + +The resulting query profile contains a _SubqueryNode_ which has the runtime of +all its children combined. + +Actually, we cheated a little. The optimizer would have completely removed the +subquery if it had not been deactivated (`rules:["-all"]`). The optimized +version would take longer in the "optimizing plan" stage, but should perform +better with a lot of results. + +## Example: AQL with Aggregation + +Let us try a more advanced query, using a [COLLECT](../high-level-operations/collect.md) +statement. Assume we have a user collection with each document having a city, +a username and an age attribute. + +The following query gets us all age groups in buckets (0-9, 10-19, 20-29, ...): + +```js +--- +name: 04_workWithAQL_profileQueryAggregation +description: '' +--- +~db._create('myusers'); +~["berlin", "paris", "cologne", "munich", "london"].forEach((c) => { ["peter", "david", "simon", "lars"].forEach( n => db.myusers.insert({ city : c, name : n, age: Math.floor(Math.random() * 75) }) ) }); +db._profileQuery(` + FOR u IN myusers + COLLECT ageGroup = FLOOR(u.age / 10) * 10 + AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age), len = LENGTH(u) + RETURN { + ageGroup, + minAge, + maxAge, + len + }`, {}, {colors: false} +); +~db._drop("myusers") +``` + +Without any indexes this query should have to perform the following operations: + +1. Perform a full collection scan via a _EnumerateCollectionNode_ and outputting + a row containing the document in `doc`. +2. Compute the expression `LET #1 = FLOOR(u.age / 10) * 10` for all inputs via + a _CalculationNode_ +3. Perform the aggregations via the _CollectNode_ +4. Sort the resulting aggregated rows via a _SortNode_ +5. Build a result value via another _CalculationNode_ +6. Put the result variable into the result set via the _ResultNode_ + +Like within the example above, you can see that after the _CalculationNode_ +stage, from the originally 20 rows only a handful remained. + +## Typical AQL Performance Mistakes + +With the new query profiler you should be able to spot typical performance +mistakes that we see quite often: + +- Not employing indexes to speed up queries with common filter expressions +- Not using shard keys in filter statements, when it is known + (only a cluster problem) +- Using subqueries to calculate an intermediary result, but only using a + few results + +Bad example: + +```aql +LET vertices = ( + FOR v IN 1..2 ANY @startVertex GRAPH 'my_graph' + // <-- add a LIMIT 1 here + RETURN v +) +FOR doc IN collection + FILTER doc.value == vertices[0].value + RETURN doc +``` + +Adding a `LIMIT 1` into the subquery should result in better performance, +because the traversal can be stopped after the first result instead of +computing all paths. + +Another mistake is to start a graph traversal from the wrong side +(if both ends are known). + +Assume we have two vertex collections _users_ and _products_ as well as an +edge collection _purchased_. The graph model looks like this: +`(users) <--[purchased]--> (products)`, i.e. every user is connected with an +edge in _purchased_ to zero or more _products_. + +If we want to know all users that have purchased the product _playstation_ +as well as products of `type` _legwarmer_ we could use this query: + +```aql +FOR prod IN products + FILTER prod.type == 'legwarmer' + FOR v,e,p IN 2..2 OUTBOUND prod purchased + FILTER v._key == 'playstation' // <-- last vertex of the path + RETURN p.vertices[1] // <-- the user +``` + +This query first finds all legwarmer products and then performs a traversal +for each of them. But we could also inverse the traversal by starting of with +the known _playstation_ product. This way we only need a single traversal +to achieve the same result: + +```aql +FOR v,e,p IN 2..2 OUTBOUND 'product/playstation' purchased + FILTER v.type == 'legwarmer' // <-- last vertex of the path + RETURN p.vertices[1] // <-- the user +``` diff --git a/site/content/arangodb/oem/aql/execution-and-performance/query-statistics.md b/site/content/arangodb/oem/aql/execution-and-performance/query-statistics.md new file mode 100644 index 0000000000..907a29dc30 --- /dev/null +++ b/site/content/arangodb/oem/aql/execution-and-performance/query-statistics.md @@ -0,0 +1,98 @@ +--- +title: AQL query statistics +menuTitle: Query statistics +weight: 5 +description: >- + All queries that have successfully run to completion return statistics about + the execution +--- +Execution statistics can be retrieved by calling `getExtra()` on the cursor. +The statistics are returned in the return value's `stats` attribute: + +```js +--- +name: 06_workWithAQL_statementsExtra +description: '' +--- +db._query(` + FOR i IN 1..@count + INSERT { _key: CONCAT('anothertest', TO_STRING(i)) } INTO mycollection`, + { count: 100 }, + {}, + { fullCount: true } +).getExtra(); + +db._query({ + "query": ` + FOR i IN 200..@count + INSERT { _key: CONCAT('anothertest', TO_STRING(i)) } INTO mycollection`, + "bindVars": { count: 300 }, + "options": { fullCount: true } +}).getExtra(); +``` + +The meaning of the statistics attributes is as follows: + +- **writesExecuted**: The total number of data-modification operations successfully executed. + This is equivalent to the number of documents created, updated, or removed by `INSERT`, + `UPDATE`, `REPLACE`, `REMOVE`, or `UPSERT` operations. +- **writesIgnored**: The total number of data-modification operations that were unsuccessful, + but have been ignored because of the `ignoreErrors` query option. +- **scannedFull**: The total number of documents iterated over when scanning a collection + without an index. Documents scanned by subqueries are included in the result, but + operations triggered by built-in or user-defined AQL functions are not. +- **scannedIndex**: The total number of documents iterated over when scanning a collection using + an index. Documents scanned by subqueries are included in the result, but operations + triggered by built-in or user-defined AQL functions are not. +- **cursorsCreated**: The total number of cursor objects created during query execution. Cursor + objects are created for index lookups. +- **cursorsRearmed**: The total number of times an existing cursor object was repurposed. + Repurposing an existing cursor object is normally more efficient compared to destroying an + existing cursor object and creating a new one from scratch. +- **cacheHits**: The total number of index entries read from in-memory caches for indexes + of type edge or persistent. This value is only non-zero when reading from indexes + that have an in-memory cache enabled, and when the query allows using the in-memory + cache (i.e. using equality lookups on all index attributes). +- **cacheMisses**: The total number of cache read attempts for index entries that could not + be served from in-memory caches for indexes of type edge or persistent. This value + is only non-zero when reading from indexes that have an in-memory cache enabled, the + query allows using the in-memory cache (i.e. using equality lookups on all index attributes) + and the looked up values are not present in the cache. +- **filtered**: The total number of documents removed after executing a filter condition + in a `FilterNode` or another node that post-filters data. Note that nodes of the + `IndexNode` type can also filter documents by selecting only the required index range + from a collection, and the `filtered` value only indicates how much filtering was done by a + post-filter in the `IndexNode` itself or following `FilterNode` nodes. + Nodes of the `EnumerateCollectionNode` and `TraversalNode` types can also apply + filter conditions and can report the number of filtered documents. +- **httpRequests**: The total number of cluster-internal HTTP requests performed. +- **fullCount** (_optional_): The total number of documents that matched the search condition if the query's + final top-level `LIMIT` operation were not present. + This attribute may only be returned if the `fullCount` option was set when starting the + query and only contains a sensible value if the query contains a `LIMIT` operation on + the top level. +- **executionTime**: The query execution time (wall-clock time) in seconds. +- **peakMemoryUsage**: The maximum memory usage of the query while it was running. In a cluster, + the memory accounting is done per shard, and the memory usage reported is the peak + memory usage value from the individual shards. + Note that to keep things light-weight, the per-query memory usage is tracked on a relatively + high level, not including any memory allocator overhead nor any memory used for temporary + results calculations (e.g. memory allocated/deallocated inside AQL expressions and function + calls). +- **intermediateCommits**: + The total number of intermediate commits the query has performed. This number + can only be greater than zero for data-modification queries that perform + modifications beyond the `--rocksdb.intermediate-commit-count` or + `--rocksdb.intermediate-commit-size` thresholds. In a cluster, the + intermediate commits are tracked per DB-Server that participates in the query + and are summed up in the end. +- **nodes** (_optional_): When the query is executed with the option `profile` set to at least `2`, + then this value contains runtime statistics per query execution node. + For a human readable output you can execute `db._profileQuery(, )` + in the arangosh. + - **id**: The execution node ID to correlate the statistics with the `plan` returned in + the `extra` attribute. + - **calls**: The number of calls to this node. + - **items**: The number of items returned by this node. Items are the temporary results + returned at this stage. + - **runtime**: The execution time of this node in seconds. diff --git a/site/content/arangodb/oem/aql/functions/_index.md b/site/content/arangodb/oem/aql/functions/_index.md new file mode 100644 index 0000000000..b38aa556de --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/_index.md @@ -0,0 +1,37 @@ +--- +title: AQL functions +menuTitle: Functions +weight: 30 +description: >- + AQL offers an extensive set of functions to allow for complex computations + and it supports user-defined functions +--- +Functions can be called at any query position where an expression is allowed. +The general function call syntax is: + +```aql +FUNCTIONNAME(arguments) +``` + +`FUNCTIONNAME` is the name of the function to be called, and `arguments` +is a comma-separated list of function arguments. If a function does not need any +arguments, the argument list can be left empty. However, even if the argument +list is empty, the parentheses around it are still mandatory to make function +calls distinguishable from variable names. + +Some example function calls: + +```aql +HAS(user, "name") +LENGTH(friends) +COLLECTIONS() +``` + +In contrast to collection and variable names, function names are case-insensitive, +i.e. `LENGTH(foo)` and `length(foo)` are equivalent. + +## Extending AQL + +It is possible to extend AQL with user-defined functions. These functions need to +be written in JavaScript, and have to be registered before they can be used in a query. +Please refer to [Extending AQL](../user-defined-functions.md) for more details. diff --git a/site/content/arangodb/oem/aql/functions/arangosearch.md b/site/content/arangodb/oem/aql/functions/arangosearch.md new file mode 100644 index 0000000000..a02586b06a --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/arangosearch.md @@ -0,0 +1,1361 @@ +--- +title: ArangoSearch functions in AQL +menuTitle: ArangoSearch +weight: 5 +description: >- + ArangoSearch offers various AQL functions for search queries to control the search context, for filtering and scoring +pageToc: + maxHeadlineLevel: 3 +--- +You can form search expressions by composing ArangoSearch function calls, +logical operators and comparison operators. This allows you to filter Views +as well as to utilize inverted indexes to filter collections. + +The AQL [`SEARCH` operation](../high-level-operations/search.md) accepts search expressions, +such as `PHRASE(doc.text, "foo bar", "text_en")`, for querying Views. You can +combine ArangoSearch filter and context functions as well as operators like +`AND` and `OR` to form complex search conditions. Similarly, the +[`FILTER` operation](../high-level-operations/filter.md) accepts such search expressions +when using [inverted indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md). + +Scoring functions allow you to rank matches and to sort results by relevance. +They are limited to Views. + +Search highlighting functions let you retrieve the string positions of matches. +They are limited to Views. + +You can use most functions also without an inverted index or a View and the +`SEARCH` keyword, but then they are not accelerated by an index. + +See [Information Retrieval with ArangoSearch](../../indexes-and-search/arangosearch/_index.md) for an +introduction. + +## Context Functions + +### ANALYZER() + +`ANALYZER(expr, analyzer) → retVal` + +Sets the Analyzer for the given search expression. + +{{< info >}} +The `ANALYZER()` function is only applicable for queries against `arangosearch` Views. + +In queries against `search-alias` Views and inverted indexes, you don't need to +specify Analyzers because every field can be indexed with a single Analyzer only +and they are inferred from the index definition. +{{< /info >}} + +The default Analyzer is `identity` for any search expression that is used for +filtering `arangosearch` Views. This utility function can be used +to wrap a complex expression to set a particular Analyzer. It also sets it for +all the nested functions which require such an argument to avoid repeating the +Analyzer parameter. If an Analyzer argument is passed to a nested function +regardless, then it takes precedence over the Analyzer set via `ANALYZER()`. + +The `TOKENS()` function is an exception. It requires the Analyzer name to be +passed in in all cases even if wrapped in an `ANALYZER()` call, because it is +not an ArangoSearch function but a regular string function which can be used +outside of `SEARCH` operations. + +- **expr** (expression): any valid search expression +- **analyzer** (string): name of an [Analyzer](../../indexes-and-search/analyzers.md). +- returns **retVal** (any): the expression result that it wraps + +#### Example: Using a custom Analyzer + +Assuming a View definition with an Analyzer whose name and type is `delimiter`: + +```json +{ + "links": { + "coll": { + "analyzers": [ "delimiter" ], + "includeAllFields": true, + } + }, + ... +} +``` + +… with the Analyzer properties `{ "delimiter": "|" }` and an example document +`{ "text": "foo|bar|baz" }` in the collection `coll`, the following query would +return the document: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(doc.text == "bar", "delimiter") + RETURN doc +``` + +The expression `doc.text == "bar"` has to be wrapped by `ANALYZER()` in order +to set the Analyzer to `delimiter`. Otherwise the expression would be evaluated +with the default `identity` Analyzer. `"foo|bar|baz" == "bar"` would not match, +but the View does not even process the indexed fields with the `identity` +Analyzer. The following query would also return an empty result because of +the Analyzer mismatch: + +```aql +FOR doc IN viewName + SEARCH doc.text == "foo|bar|baz" + //SEARCH ANALYZER(doc.text == "foo|bar|baz", "identity") + RETURN doc +``` + +#### Example: Setting the Analyzer context with and without `ANALYZER()` + +In below query, the search expression is swapped by `ANALYZER()` to set the +`text_en` Analyzer for both `PHRASE()` functions: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(PHRASE(doc.text, "foo") OR PHRASE(doc.text, "bar"), "text_en") + RETURN doc +``` + +Without the usage of `ANALYZER()`: + +```aql +FOR doc IN viewName + SEARCH PHRASE(doc.text, "foo", "text_en") OR PHRASE(doc.text, "bar", "text_en") + RETURN doc +``` + +#### Example: Analyzer precedence and specifics of the `TOKENS()` function + +In the following example `ANALYZER()` is used to set the Analyzer `text_en`, +but in the second call to `PHRASE()` a different Analyzer is set (`identity`) +which overrules `ANALYZER()`. Therefore, the `text_en` Analyzer is used to find +the phrase *foo* and the `identity` Analyzer to find *bar*: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(PHRASE(doc.text, "foo") OR PHRASE(doc.text, "bar", "identity"), "text_en") + RETURN doc +``` + +Despite the wrapping `ANALYZER()` function, the Analyzer name cannot be +omitted in calls to the `TOKENS()` function. Both occurrences of `text_en` +are required, to set the Analyzer for the expression `doc.text IN ...` and +for the `TOKENS()` function itself. This is because the `TOKENS()` function +is a regular string function that does not take the Analyzer context into +account: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(doc.text IN TOKENS("foo", "text_en"), "text_en") + RETURN doc +``` + +### BOOST() + +`BOOST(expr, boost) → retVal` + +Override boost in the context of a search expression with a specified value, +making it available for scorer functions. By default, the context has a boost +value equal to `1.0`. + +- **expr** (expression): any valid search expression +- **boost** (number): numeric boost value +- returns **retVal** (any): the expression result that it wraps + +#### Example: Boosting a search sub-expression + +```aql +FOR doc IN viewName + SEARCH ANALYZER(BOOST(doc.text == "foo", 2.5) OR doc.text == "bar", "text_en") + LET score = BM25(doc) + SORT score DESC + RETURN { text: doc.text, score } +``` + +Assuming a View with the following documents indexed and processed by the +`text_en` Analyzer: + +```js +{ "text": "foo bar" } +{ "text": "foo" } +{ "text": "bar" } +{ "text": "foo baz" } +{ "text": "baz" } +``` + +… the result of above query would be: + +```json +[ + { + "text": "foo bar", + "score": 2.787301540374756 + }, + { + "text": "foo baz", + "score": 1.6895781755447388 + }, + { + "text": "foo", + "score": 1.525835633277893 + }, + { + "text": "bar", + "score": 0.9913395643234253 + } +] +``` + +## Filter Functions + +### EXISTS() + +{{< info >}} +If you use `arangosearch` Views, the `EXISTS()` function only matches values if +you set the **storeValues** link property to `"id"` in the View definition +(the default is `"none"`). +{{< /info >}} + +#### Testing for attribute presence + +`EXISTS(path)` + +Match documents where the attribute at `path` is present. + +- **path** (attribute path expression): the attribute to test in the document +- returns nothing: the function evaluates to a boolean, but this value cannot be + returned. The function can only be called in a search expression. It throws + an error if used outside of a [`SEARCH` operation](../high-level-operations/search.md) or + a `FILTER` operation that uses an inverted index. + +```aql +FOR doc IN viewName + SEARCH EXISTS(doc.text) + RETURN doc +``` + +#### Testing for attribute type + +`EXISTS(path, type)` + +Match documents where the attribute at `path` is present _and_ is of the +specified data type. + +- **path** (attribute path expression): the attribute to test in the document +- **type** (string): data type to test for, can be one of: + - `"null"` + - `"bool"` / `"boolean"` + - `"numeric"` + - `"type"` (matches `null`, `boolean`, and `numeric` values) + - `"string"` + - `"analyzer"` (see below) +- returns nothing: the function evaluates to a boolean, but this value cannot be + returned. The function can only be called in a search expression. It throws + an error if used outside of a [`SEARCH` operation](../high-level-operations/search.md) or + a `FILTER` operation that uses an inverted index. + +```aql +FOR doc IN viewName + SEARCH EXISTS(doc.text, "string") + RETURN doc +``` + +#### Testing for Analyzer index status + +`EXISTS(path, "analyzer", analyzer)` + +Match documents where the attribute at `path` is present _and_ was indexed +by the specified `analyzer`. + +- **path** (attribute path expression): the attribute to test in the document +- **type** (string): string literal `"analyzer"` +- **analyzer** (string, _optional_): name of an [Analyzer](../../indexes-and-search/analyzers.md). + Uses the Analyzer of a wrapping `ANALYZER()` call if not specified or + defaults to `"identity"` +- returns nothing: the function evaluates to a boolean, but this value cannot be + returned. The function can only be called in a search expression. It throws + an error if used outside of a [`SEARCH` operation](../high-level-operations/search.md) or + a `FILTER` operation that uses an inverted index. + +```aql +FOR doc IN viewName + SEARCH EXISTS(doc.text, "analyzer", "text_en") + RETURN doc +``` + +#### Testing for nested fields + +`EXISTS(path, "nested")` + +Match documents where the attribute at `path` is present _and_ is indexed +as a nested field for [nested search with Views](../../indexes-and-search/arangosearch/nested-search.md) +or [inverted indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md#nested-search-enterprise-edition). + +- **path** (attribute path expression): the attribute to test in the document +- **type** (string): string literal `"nested"` +- returns nothing: the function evaluates to a boolean, but this value cannot be + returned. The function can only be called in a search expression. It throws + an error if used outside of a [`SEARCH` operation](../high-level-operations/search.md) or + a `FILTER` operation that uses an inverted index. + +**Examples** + +Only return documents from the View `viewName` whose `text` attribute is indexed +as a nested field: + +```aql +FOR doc IN viewName + SEARCH EXISTS(doc.text, "nested") + RETURN doc +``` + +Only return documents whose `attr` attribute and its nested `text` attribute are +indexed as nested fields: + +```aql +FOR doc IN viewName + SEARCH doc.attr[? FILTER EXISTS(CURRENT.text, "nested")] + RETURN doc +``` + +Only return documents from the collection `coll` whose `text` attribute is indexed +as a nested field by an inverted index: + +```aql +FOR doc IN coll OPTIONS { indexHint: "inv-idx", forceIndexHint: true } + FILTER EXISTS(doc.text, "nested") + RETURN doc +``` + +Only return documents whose `attr` attribute and its nested `text` attribute are +indexed as nested fields: + +```aql +FOR doc IN coll OPTIONS { indexHint: "inv-idx", forceIndexHint: true } + FILTER doc.attr[? FILTER EXISTS(CURRENT.text, "nested")] + RETURN doc +``` + +### IN_RANGE() + +`IN_RANGE(path, low, high, includeLow, includeHigh) → included` + +Match documents where the attribute at `path` is greater than (or equal to) +`low` and less than (or equal to) `high`. + +You can use `IN_RANGE()` for searching more efficiently compared to an equivalent +expression that combines two comparisons with a logical conjunction: + +- `IN_RANGE(path, low, high, true, true)` instead of `low <= value AND value <= high` +- `IN_RANGE(path, low, high, true, false)` instead of `low <= value AND value < high` +- `IN_RANGE(path, low, high, false, true)` instead of `low < value AND value <= high` +- `IN_RANGE(path, low, high, false, false)` instead of `low < value AND value < high` + +`low` and `high` can be numbers or strings (technically also `null`, `true` +and `false`), but the data type must be the same for both. + +{{< warning >}} +The alphabetical order of characters is not taken into account by ArangoSearch, +i.e. range queries in SEARCH operations against Views will not follow the +language rules as per the defined Analyzer locale (except for the +[`collation` Analyzer](../../indexes-and-search/analyzers.md#collation)) nor the server language +(startup option `--default-language`)! +Also see [Known Issues](../../release-notes/version-oem/known-issues-in-oem.md#arangosearch). +{{< /warning >}} + +There is a corresponding [`IN_RANGE()` Miscellaneous Function](miscellaneous.md#in_range) +that is used outside of `SEARCH` operations. + +- **path** (attribute path expression): + the path of the attribute to test in the document +- **low** (number\|string): minimum value of the desired range +- **high** (number\|string): maximum value of the desired range +- **includeLow** (bool): whether the minimum value shall be included in + the range (left-closed interval) or not (left-open interval) +- **includeHigh** (bool): whether the maximum value shall be included in + the range (right-closed interval) or not (right-open interval) +- returns **included** (bool): whether `value` is in the range + +If `low` and `high` are the same, but `includeLow` and/or `includeHigh` is set +to `false`, then nothing will match. If `low` is greater than `high` nothing will +match either. + +#### Example: Using numeric ranges + +To match documents with the attribute `value >= 3` and `value <= 5` using the +default `"identity"` Analyzer you would write the following query: + +```aql +FOR doc IN viewName + SEARCH IN_RANGE(doc.value, 3, 5, true, true) + RETURN doc.value +``` + +This will also match documents which have an array of numbers as `value` +attribute where at least one of the numbers is in the specified boundaries. + +#### Example: Using string ranges + +Using string boundaries and a text Analyzer allows to match documents which +have at least one token within the specified character range: + +```aql +FOR doc IN valView + SEARCH ANALYZER(IN_RANGE(doc.value, "a","f", true, false), "text_en") + RETURN doc +``` + +This will match `{ "value": "bar" }` and `{ "value": "foo bar" }` because the +_b_ of _bar_ is in the range (`"a" <= "b" < "f"`), but not `{ "value": "foo" }` +because the _f_ of _foo_ is excluded (`high` is "f" but `includeHigh` is false). + +### MIN_MATCH() + +`MIN_MATCH(expr1, ... exprN, minMatchCount) → fulfilled` + +Match documents where at least `minMatchCount` of the specified +search expressions are satisfied. + +There is a corresponding [`MIN_MATCH()` Miscellaneous function](miscellaneous.md#min_match) +that is used outside of `SEARCH` operations. + +- **expr** (expression, _repeatable_): any valid search expression +- **minMatchCount** (number): minimum number of search expressions that should + be satisfied +- returns **fulfilled** (bool): whether at least `minMatchCount` of the + specified expressions are `true` + +#### Example: Matching a subset of search sub-expressions + +Assuming a View with a text Analyzer, you may use it to match documents where +the attribute contains at least two out of three tokens: + +```aql +LET t = TOKENS("quick brown fox", "text_en") +FOR doc IN viewName + SEARCH ANALYZER(MIN_MATCH(doc.text == t[0], doc.text == t[1], doc.text == t[2], 2), "text_en") + RETURN doc.text +``` + +This will match `{ "text": "the quick brown fox" }` and `{ "text": "some brown fox" }`, +but not `{ "text": "snow fox" }` which only fulfills one of the conditions. + +Note that you can also use the `AT LEAST` [array comparison operator](../high-level-operations/search.md#array-comparison-operators) +in the specific case of matching a subset of tokens against a single attribute: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(TOKENS("quick brown fox", "text_en") AT LEAST (2) == doc.text, "text_en") + RETURN doc.text +``` + +### MINHASH_MATCH() + +`MINHASH_MATCH(path, target, threshold, analyzer) → fulfilled` + +Match documents with an approximate Jaccard similarity of at least the +`threshold`, approximated with the specified `minhash` Analyzer. + +To only compute the MinHash signatures, see the +[`MINHASH()` Miscellaneous function](miscellaneous.md#minhash). + +- **path** (attribute path expression\|string): the path of the attribute in + a document or a string +- **target** (string): the string to hash with the specified Analyzer and to + compare against the stored attribute +- **threshold** (number, _optional_): a value between `0.0` and `1.0`. +- **analyzer** (string): the name of a [`minhash` Analyzer](../../indexes-and-search/analyzers.md#minhash). +- returns **fulfilled** (bool): `true` if the approximate Jaccard similarity + is greater than or equal to the specified threshold, `false` otherwise + +#### Example: Find documents with a text similar to a target text + +Assuming a View with a `minhash` Analyzer, you can use the stored +MinHash signature to find candidates for the more expensive Jaccard similarity +calculation: + +```aql +LET target = "the quick brown fox jumps over the lazy dog" +LET targetSignature = TOKENS(target, "myMinHash") + +FOR doc IN viewName + SEARCH MINHASH_MATCH(doc.text, target, 0.5, "myMinHash") // approximation + LET jaccard = JACCARD(targetSignature, TOKENS(doc.text, "myMinHash")) + FILTER jaccard > 0.75 + SORT jaccard DESC + RETURN doc.text +``` + +### NGRAM_MATCH() + +`NGRAM_MATCH(path, target, threshold, analyzer) → fulfilled` + +Match documents whose attribute value has an +[_n_-gram similarity](https://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf) +higher than the specified threshold compared to the target value. + +The similarity is calculated by counting how long the longest sequence of +matching _n_-grams is, divided by the target's total _n_-gram count. +Only fully matching _n_-grams are counted. + +The _n_-grams for both attribute and target are produced by the specified +Analyzer. Increasing the _n_-gram length will increase accuracy, but reduce +error tolerance. In most cases a size of 2 or 3 will be a good choice. + +Also see the String Functions +[`NGRAM_POSITIONAL_SIMILARITY()`](string.md#ngram_positional_similarity) +and [`NGRAM_SIMILARITY()`](string.md#ngram_similarity) +for calculating _n_-gram similarity that cannot be accelerated by a View index. + +- **path** (attribute path expression\|string): the path of the attribute in + a document or a string +- **target** (string): the string to compare against the stored attribute +- **threshold** (number, _optional_): a value between `0.0` and `1.0`. Defaults + to `0.7` if none is specified. +- **analyzer** (string): the name of an [Analyzer](../../indexes-and-search/analyzers.md). +- returns **fulfilled** (bool): `true` if the evaluated _n_-gram similarity value + is greater than or equal to the specified threshold, `false` otherwise + +{{< info >}} +Use an Analyzer of type `ngram` with `preserveOriginal: false` and `min` equal +to `max`. Otherwise, the similarity score calculated internally will be lower +than expected. + +The Analyzer must have the `"position"` and `"frequency"` features enabled or +the `NGRAM_MATCH()` function will not find anything. +{{< /info >}} + +#### Example: Using a custom bigram Analyzer + +Given a View indexing an attribute `text`, a custom _n_-gram Analyzer `"bigram"` +(`min: 2, max: 2, preserveOriginal: false, streamType: "utf8"`) and a document +`{ "text": "quick red fox" }`, the following query would match it (with a +threshold of `1.0`): + +```aql +FOR doc IN viewName + SEARCH NGRAM_MATCH(doc.text, "quick fox", "bigram") + RETURN doc.text +``` + +The following will also match (note the low threshold value): + +```aql +FOR doc IN viewName + SEARCH NGRAM_MATCH(doc.text, "quick blue fox", 0.4, "bigram") + RETURN doc.text +``` + +The following will not match (note the high threshold value): + +```aql +FOR doc IN viewName + SEARCH NGRAM_MATCH(doc.text, "quick blue fox", 0.9, "bigram") + RETURN doc.text +``` + +#### Example: Using constant values + +`NGRAM_MATCH()` can be called with constant arguments, but for such calls the +`analyzer` argument is mandatory (even for calls inside of a `SEARCH` clause): + +```aql +FOR doc IN viewName + SEARCH NGRAM_MATCH("quick fox", "quick blue fox", 0.9, "bigram") + RETURN doc.text +``` + +```aql +RETURN NGRAM_MATCH("quick fox", "quick blue fox", "bigram") +``` + +### PHRASE() + +`PHRASE(path, phrasePart, analyzer)` + +`PHRASE(path, phrasePart1, skipTokens1, ... phrasePartN, skipTokensN, analyzer)` + +`PHRASE(path, [ phrasePart1, skipTokens1, ... phrasePartN, skipTokensN ], analyzer)` + +Search for a phrase in the referenced attribute. It only matches documents in +which the tokens appear in the specified order. To search for tokens in any +order use [`TOKENS()`](string.md#tokens) instead. + +The phrase can be expressed as an arbitrary number of `phraseParts` separated by +*skipTokens* number of tokens (wildcards), either as separate arguments or as +array as second argument. + +- **path** (attribute path expression): the attribute to test in the document +- **phrasePart** (string\|array\|object): text to search for in the tokens. + Can also be an [array](#example-using-phrase-with-an-array-of-tokens) + comprised of string, array and [object tokens](#object-tokens), or tokens + interleaved with numbers of `skipTokens`. The specified `analyzer` is applied + to string and array tokens, but not for object tokens. +- **skipTokens** (number, _optional_): amount of tokens to treat + as wildcards +- **analyzer** (string, _optional_): name of an [Analyzer](../../indexes-and-search/analyzers.md). + Uses the Analyzer of a wrapping `ANALYZER()` call if not specified or + defaults to `"identity"` +- returns nothing: the function evaluates to a boolean, but this value cannot be + returned. The function can only be called in a search expression. It throws + an error if used outside of a [`SEARCH` operation](../high-level-operations/search.md) or + a `FILTER` operation that uses an inverted index. + +{{< info >}} +The selected Analyzer must have the `"position"` and `"frequency"` features +enabled. The `PHRASE()` function will otherwise not find anything. +{{< /info >}} + +#### Object tokens + +- `{IN_RANGE: [low, high, includeLow, includeHigh]}`: + see [`IN_RANGE()`](#in_range). *low* and *high* can only be strings. +- `{LEVENSHTEIN_MATCH: [token, maxDistance, transpositions, maxTerms, prefix]}`: + - `token` (string): a string to search + - `maxDistance` (number): maximum Levenshtein / Damerau-Levenshtein distance + - `transpositions` (bool, _optional_): if set to `false`, a Levenshtein + distance is computed, otherwise a Damerau-Levenshtein distance (default) + - `maxTerms` (number, _optional_): consider only a specified number of the + most relevant terms. One can pass `0` to consider all matched terms, but it may + impact performance negatively. The default value is `64`. + - `prefix` (string, _optional_): if defined, then a search for the exact + prefix is carried out, using the matches as candidates. The Levenshtein / + Damerau-Levenshtein distance is then computed for each candidate using the + remainders of the strings. This option can improve performance in cases where + there is a known common prefix. The default value is an empty string + (introduced in v3.7.13, v3.8.1). +- `{STARTS_WITH: [prefix]}`: see [`STARTS_WITH()`](#starts_with). + Array brackets are optional +- `{TERM: [token]}`: equal to `token` but without Analyzer tokenization. + Array brackets are optional +- `{TERMS: [token1, ..., tokenN]}`: one of `token1, ..., tokenN` can be found + in specified position. Inside an array the object syntax can be replaced with + the object field value, e.g., `[..., [token1, ..., tokenN], ...]`. +- `{WILDCARD: [token]}`: see [`LIKE()`](#like). + Array brackets are optional + +An array token inside an array can be used in the `TERMS` case only. + +Also see [Example: Using object tokens](#example-using-object-tokens). + +#### Example: Using a text Analyzer for a phrase search + +Given a View indexing an attribute `text` with the `"text_en"` Analyzer and a +document `{ "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit" }`, +the following query would match it: + +```aql +FOR doc IN viewName + SEARCH PHRASE(doc.text, "lorem ipsum", "text_en") + RETURN doc.text +``` + +However, this search expression does not because the tokens `"ipsum"` and +`"lorem"` do not appear in this order: + +```aql +PHRASE(doc.text, "ipsum lorem", "text_en") +``` + +#### Example: Skip tokens for a proximity search + +To match `"ipsum"` and `"amet"` with any two tokens in between, you can use the +following search expression: + +```aql +PHRASE(doc.text, "ipsum", 2, "amet", "text_en") +``` + +The `skipTokens` value of `2` defines how many wildcard tokens have to appear +between *ipsum* and *amet*. A `skipTokens` value of `0` means that the tokens +must be adjacent. Negative values are allowed, but not very useful. These three +search expressions are equivalent: + +```aql +PHRASE(doc.text, "lorem ipsum", "text_en") +PHRASE(doc.text, "lorem", 0, "ipsum", "text_en") +PHRASE(doc.text, "ipsum", -1, "lorem", "text_en") +``` + +#### Example: Using `PHRASE()` with an array of tokens + +The `PHRASE()` function also accepts an array as second argument with +`phrasePart` and `skipTokens` parameters as elements. + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, ["quick brown fox"], "text_en") RETURN doc +FOR doc IN myView SEARCH PHRASE(doc.title, ["quick", "brown", "fox"], "text_en") RETURN doc +``` + +This syntax variation enables the usage of computed expressions: + +```aql +LET proximityCondition = [ "foo", ROUND(RAND()*10), "bar" ] +FOR doc IN viewName + SEARCH PHRASE(doc.text, proximityCondition, "text_en") + RETURN doc +``` + +```aql +LET tokens = TOKENS("quick brown fox", "text_en") // ["quick", "brown", "fox"] +FOR doc IN myView SEARCH PHRASE(doc.title, tokens, "text_en") RETURN doc +``` + +Above example is equivalent to the more cumbersome and static form: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, "quick", 0, "brown", 0, "fox", "text_en") RETURN doc +``` + +You can optionally specify the number of skipTokens in the array form before +every string element: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, ["quick", 1, "fox", "jumps"], "text_en") RETURN doc +``` + +It is the same as the following: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, "quick", 1, "fox", 0, "jumps", "text_en") RETURN doc +``` + +#### Example: Handling of arrays with no members + +Empty arrays are skipped: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, "quick", 1, [], 1, "jumps", "text_en") RETURN doc +``` + +The query is equivalent to: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, "quick", 2 "jumps", "text_en") RETURN doc +``` + +Providing only empty arrays is valid, but will yield no results. + +#### Example: Using object tokens + +Using object tokens `STARTS_WITH`, `WILDCARD`, `LEVENSHTEIN_MATCH`, `TERMS` and +`IN_RANGE`: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, + {STARTS_WITH: ["qui"]}, 0, + {WILDCARD: ["b%o_n"]}, 0, + {LEVENSHTEIN_MATCH: ["foks", 2]}, 0, + {TERMS: ["jump", "run"]}, 0, // Analyzer not applied! + {IN_RANGE: ["over", "through", true, false]}, + "text_en") RETURN doc +``` + +Note that the `text_en` Analyzer has stemming enabled, but for object tokens +the Analyzer isn't applied. `{TERMS: ["jumps", "runs"]}` would not match the +indexed (and stemmed!) attribute value. Therefore, the trailing `s` which would +be stemmed away is removed from both words manually in the example. + +Above example is equivalent to: + +```aql +FOR doc IN myView SEARCH PHRASE(doc.title, +[ + {STARTS_WITH: "qui"}, 0, + {WILDCARD: "b%o_n"}, 0, + {LEVENSHTEIN_MATCH: ["foks", 2]}, 0, + ["jumps", "runs"], 0, // Analyzer is applied using this syntax + {IN_RANGE: ["over", "through", true, false]} +], "text_en") RETURN doc +``` + +### STARTS_WITH() + +`STARTS_WITH(path, prefix) → startsWith` + +Match the value of the attribute that starts with `prefix`. If the attribute +is processed by a tokenizing Analyzer (type `"text"` or `"delimiter"`) or if it +is an array, then a single token/element starting with the prefix is sufficient +to match the document. + +{{< warning >}} +The alphabetical order of characters is not taken into account by ArangoSearch, +i.e. range queries in SEARCH operations against Views will not follow the +language rules as per the defined Analyzer locale (except for the +[`collation` Analyzer](../../indexes-and-search/analyzers.md#collation)) nor the server language +(startup option `--default-language`)! +Also see [Known Issues](../../release-notes/version-oem/known-issues-in-oem.md#arangosearch). +{{< /warning >}} + +There is a corresponding [`STARTS_WITH()` String function](string.md#starts_with) +that is used outside of `SEARCH` operations. + +- **path** (attribute path expression): the path of the attribute to compare + against in the document +- **prefix** (string): a string to search at the start of the text +- returns **startsWith** (bool): whether the specified attribute starts with + the given prefix + +--- + +`STARTS_WITH(path, prefixes, minMatchCount) → startsWith` + +Match the value of the attribute that starts with one of the `prefixes`, or +optionally with at least `minMatchCount` of the prefixes. + +- **path** (attribute path expression): the path of the attribute to compare + against in the document +- **prefixes** (array): an array of strings to search at the start of the text +- **minMatchCount** (number, _optional_): minimum number of search prefixes + that should be satisfied (see + [example](#example-searching-for-one-or-multiple-prefixes)). The default is `1` +- returns **startsWith** (bool): whether the specified attribute starts with at + least `minMatchCount` of the given prefixes + +#### Example: Searching for an exact value prefix + +To match a document `{ "text": "lorem ipsum..." }` using a prefix and the +`"identity"` Analyzer you can use it like this: + +```aql +FOR doc IN viewName + SEARCH STARTS_WITH(doc.text, "lorem ip") + RETURN doc +``` + +#### Example: Searching for a prefix in text + +This query will match `{ "text": "lorem ipsum" }` as well as +`{ "text": [ "lorem", "ipsum" ] }` given a View which indexes the `text` +attribute and processes it with the `"text_en"` Analyzer: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(STARTS_WITH(doc.text, "ips"), "text_en") + RETURN doc.text +``` + +Note that it will not match `{ "text": "IPS (in-plane switching)" }` without +modification to the query. The prefixes were passed to `STARTS_WITH()` as-is, +but the built-in `text_en` Analyzer used for indexing has stemming enabled. +So the indexed values are the following: + +```aql +RETURN TOKENS("IPS (in-plane switching)", "text_en") +``` + +```json +[ + [ + "ip", + "in", + "plane", + "switch" + ] +] +``` + +The *s* is removed from *ips*, which leads to the prefix *ips* not matching +the indexed token *ip*. You may either create a custom text Analyzer with +stemming disabled to avoid this issue, or apply stemming to the prefixes: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(STARTS_WITH(doc.text, TOKENS("ips", "text_en")), "text_en") + RETURN doc.text +``` + +#### Example: Searching for one or multiple prefixes + +The `STARTS_WITH()` function accepts an array of prefix alternatives of which +only one has to match: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(STARTS_WITH(doc.text, ["something", "ips"]), "text_en") + RETURN doc.text +``` + +It will match a document `{ "text": "lorem ipsum" }` but also +`{ "text": "that is something" }`, as at least one of the words start with a +given prefix. + +The same query again, but with an explicit `minMatchCount`: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(STARTS_WITH(doc.text, ["wrong", "ips"], 1), "text_en") + RETURN doc.text +``` + +The number can be increased to require that at least this many prefixes must +be present: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(STARTS_WITH(doc.text, ["lo", "ips", "something"], 2), "text_en") + RETURN doc.text +``` + +This will still match `{ "text": "lorem ipsum" }` because at least two prefixes +(`lo` and `ips`) are found, but not `{ "text": "that is something" }` which only +contains one of the prefixes (`something`). + +### LEVENSHTEIN_MATCH() + +`LEVENSHTEIN_MATCH(path, target, distance, transpositions, maxTerms, prefix) → fulfilled` + +Match documents with a [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) +lower than or equal to `distance` between the stored attribute value and +`target`. It can optionally match documents using a pure Levenshtein distance. + +See [`LEVENSHTEIN_DISTANCE()`](string.md#levenshtein_distance) +if you want to calculate the edit distance of two strings. + +- **path** (attribute path expression\|string): the path of the attribute to + compare against in the document or a string +- **target** (string): the string to compare against the stored attribute +- **distance** (number): the maximum edit distance, which can be between + `0` and `4` if `transpositions` is `false`, and between `0` and `3` if + it is `true` +- **transpositions** (bool, _optional_): if set to `false`, a Levenshtein + distance is computed, otherwise a Damerau-Levenshtein distance (default) +- **maxTerms** (number, _optional_): consider only a specified number of the + most relevant terms. One can pass `0` to consider all matched terms, but it may + impact performance negatively. The default value is `64`. +- returns **fulfilled** (bool): `true` if the calculated distance is less than + or equal to *distance*, `false` otherwise +- **prefix** (string, _optional_): if defined, then a search for the exact + prefix is carried out, using the matches as candidates. The Levenshtein / + Damerau-Levenshtein distance is then computed for each candidate using + the `target` value and the remainders of the strings, which means that the + **prefix needs to be removed from `target`** (see + [example](#example-matching-with-prefix-search)). This option can improve + performance in cases where there is a known common prefix. The default value + is an empty string (introduced in v3.7.13, v3.8.1). + +#### Example: Matching with and without transpositions + +The Levenshtein distance between _quick_ and _quikc_ is `2` because it requires +two operations to go from one to the other (remove _k_, insert _k_ at a +different position). + +```aql +FOR doc IN viewName + SEARCH LEVENSHTEIN_MATCH(doc.text, "quikc", 2, false) // matches "quick" + RETURN doc.text +``` + +The Damerau-Levenshtein distance is `1` (move _k_ to the end). + +```aql +FOR doc IN viewName + SEARCH LEVENSHTEIN_MATCH(doc.text, "quikc", 1) // matches "quick" + RETURN doc.text +``` + +#### Example: Matching with prefix search + +Match documents with a Levenshtein distance of 1 with the prefix `qui`. The edit +distance is calculated using the search term `kc` (`quikc` with the prefix `qui` +removed) and the stored value without the prefix (e.g. `ck`). The prefix `qui` +is constant. + +```aql +FOR doc IN viewName + SEARCH LEVENSHTEIN_MATCH(doc.text, "kc", 1, false, 64, "qui") // matches "quick" + RETURN doc.text +``` + +You may compute the prefix and suffix from the input string as follows: + +```aql +LET input = "quikc" +LET prefixSize = 3 +LET prefix = LEFT(input, prefixSize) +LET suffix = SUBSTRING(input, prefixSize) +FOR doc IN viewName + SEARCH LEVENSHTEIN_MATCH(doc.text, suffix, 1, false, 64, prefix) // matches "quick" + RETURN doc.text +``` + +#### Example: Basing the edit distance on string length + +You may want to pick the maximum edit distance based on string length. +If the stored attribute is the string _quick_ and the target string is +_quicksands_, then the Levenshtein distance is 5, with 50% of the +characters mismatching. If the inputs are _q_ and _qu_, then the distance +is only 1, although it is also a 50% mismatch. + +```aql +LET target = "input" +LET targetLength = LENGTH(target) +LET maxDistance = (targetLength > 5 ? 2 : (targetLength >= 3 ? 1 : 0)) +FOR doc IN viewName + SEARCH LEVENSHTEIN_MATCH(doc.text, target, maxDistance, true) + RETURN doc.text +``` + +### LIKE() + +`LIKE(path, search) → bool` + +Check whether the pattern `search` is contained in the attribute denoted by `path`, +using wildcard matching. + +- `_`: A single arbitrary character +- `%`: Zero, one or many arbitrary characters +- `\\_`: A literal underscore +- `\\%`: A literal percent sign + +{{< info >}} +Literal backlashes require different amounts of escaping depending on the +context: +- `\` in bind variables (_Table_ view mode) in the web interface (automatically + escaped to `\\` unless the value is wrapped in double quotes and already + escaped properly) +- `\\` in bind variables (_JSON_ view mode) and queries in the web interface +- `\\` in bind variables in arangosh +- `\\\\` in queries in arangosh +- Double the amount compared to arangosh in shells that use backslashes for +escaping (`\\\\` in bind variables and `\\\\\\\\` in queries) +{{< /info >}} + +Searching with the `LIKE()` function in the context of a `SEARCH` operation +is backed by View indexes. The [String `LIKE()` function](string.md#like) +is used in other contexts such as in `FILTER` operations and cannot be +accelerated by any sort of index on the other hand. Another difference is that +the ArangoSearch variant does not accept a third argument to enable +case-insensitive matching. This can be controlled with Analyzers instead. + +- **path** (attribute path expression): the path of the attribute to compare + against in the document +- **search** (string): a search pattern that can contain the wildcard characters + `%` (meaning any sequence of characters, including none) and `_` (any single + character). Literal `%` and `_` must be escaped with backslashes. +- returns **bool** (bool): `true` if the pattern is contained in `text`, + and `false` otherwise + +#### Example: Searching with wildcards + +```aql +FOR doc IN viewName + SEARCH ANALYZER(LIKE(doc.text, "foo%b_r"), "text_en") + RETURN doc.text +``` + +`LIKE` can also be used in operator form: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(doc.text LIKE "foo%b_r", "text_en") + RETURN doc.text +``` + +## Geo functions + +The following functions can be accelerated by View indexes. There are +corresponding [Geo Functions](geo.md) for the regular geo index +type, but also general purpose functions such as GeoJSON constructors that can +be used in conjunction with ArangoSearch. + +### GEO_CONTAINS() + +Introduced in: v3.8.0 + +`GEO_CONTAINS(geoJsonA, geoJsonB) → bool` + +Checks whether the [GeoJSON object](geo.md#geojson) `geoJsonA` +fully contains `geoJsonB` (every point in B is also in A). + +- **geoJsonA** (object\|array): first GeoJSON object or coordinate array + (in longitude, latitude order) +- **geoJsonB** (object\|array): second GeoJSON object or coordinate array + (in longitude, latitude order) +- returns **bool** (bool): `true` when every point in B is also contained in A, + `false` otherwise + +### GEO_DISTANCE() + +Introduced in: v3.8.0 + +`GEO_DISTANCE(geoJsonA, geoJsonB) → distance` + +Return the distance between two [GeoJSON objects](geo.md#geojson), +measured from the `centroid` of each shape. + +- **geoJsonA** (object\|array): first GeoJSON object or coordinate array + (in longitude, latitude order) +- **geoJsonB** (object\|array): second GeoJSON object or coordinate array + (in longitude, latitude order) +- returns **distance** (number): the distance between the centroid points of + the two objects on the reference ellipsoid + +### GEO_IN_RANGE() + +Introduced in: v3.8.0 + +`GEO_IN_RANGE(geoJsonA, geoJsonB, low, high, includeLow, includeHigh) → bool` + +Checks whether the distance between two [GeoJSON objects](geo.md#geojson) +lies within a given interval. The distance is measured from the `centroid` of +each shape. + +- **geoJsonA** (object\|array): first GeoJSON object or coordinate array + (in longitude, latitude order) +- **geoJsonB** (object\|array): second GeoJSON object or coordinate array + (in longitude, latitude order) +- **low** (number): minimum value of the desired range +- **high** (number): maximum value of the desired range +- **includeLow** (bool, optional): whether the minimum value shall be included + in the range (left-closed interval) or not (left-open interval). The default + value is `true` +- **includeHigh** (bool): whether the maximum value shall be included in the + range (right-closed interval) or not (right-open interval). The default value + is `true` +- returns **bool** (bool): whether the evaluated distance lies within the range + +### GEO_INTERSECTS() + +Introduced in: v3.8.0 + +`GEO_INTERSECTS(geoJsonA, geoJsonB) → bool` + +Checks whether the [GeoJSON object](geo.md#geojson) `geoJsonA` +intersects with `geoJsonB` (i.e. at least one point of B is in A or vice versa). + +- **geoJsonA** (object\|array): first GeoJSON object or coordinate array + (in longitude, latitude order) +- **geoJsonB** (object\|array): second GeoJSON object or coordinate array + (in longitude, latitude order) +- returns **bool** (bool): `true` if A and B intersect, `false` otherwise + +## Scoring Functions + +Scoring functions return a ranking value for the documents found by a +[SEARCH operation](../high-level-operations/search.md). The better the documents match +the search expression the higher the returned number. + +The first argument to any scoring function is always the document emitted by +a `FOR` operation over an `arangosearch` View. + +To sort the result set by relevance, with the more relevant documents coming +first, sort in **descending order** by the score (e.g. `SORT BM25(...) DESC`). + +You may calculate custom scores based on a scoring function using document +attributes and numeric functions (e.g. `TFIDF(doc) * LOG(doc.value)`): + +```aql +FOR movie IN imdbView + SEARCH PHRASE(movie.title, "Star Wars", "text_en") + SORT BM25(movie) * LOG(movie.runtime + 1) DESC + RETURN movie +``` + +Sorting by more than one score is allowed. You may also sort by a mix of +scores and attributes from multiple Views as well as collections: + +```aql +FOR a IN viewA + FOR c IN coll + FOR b IN viewB + SORT TFIDF(b), c.name, BM25(a) + ... +``` + +### BM25() + +`BM25(doc, k, b) → score` + +Sorts documents using the +[**Best Matching 25** algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) +(Okapi BM25). + +- **doc** (document): must be emitted by `FOR ... IN viewName` +- **k** (number, _optional_): calibrates the text term frequency scaling. + The value needs to be non-negative (`0.0` or higher), or the returned + score is an undefined value that may cause unpredictable results. + The default is `1.2`. A `k` value of `0` corresponds to a binary model + (no term frequency), and a large value corresponds to using raw term frequency +- **b** (number, _optional_): determines the scaling by the total text length. + The value needs to be between `0.0` and `1.0` (inclusive), or the returned + score is an undefined value that may cause unpredictable results. + The default is `0.75`. At the extreme values of the coefficient `b`, BM25 + turns into the ranking functions known as: + - BM11 for `b` = `1` (corresponds to fully scaling the term weight by the + total text length) + - BM15 for `b` = `0` (corresponds to no length normalization) +- returns **score** (number): computed ranking value + +{{< info >}} +The Analyzers used for indexing document attributes must have the `"frequency"` +feature enabled. The `BM25()` function will otherwise return a score of 0. +The Analyzers should have the `"norm"` feature enabled, too, or normalization +will be disabled, which is not meaningful for BM25 and BM11. BM15 does not need +the `"norm"` feature as it has no length normalization. +{{< /info >}} + +#### Example: Sorting by default `BM25()` score + +Sorting by relevance with BM25 at default settings: + +```aql +FOR doc IN viewName + SEARCH ... + SORT BM25(doc) DESC + RETURN doc +``` + +#### Example: Sorting with tuned `BM25()` ranking + +Sorting by relevance, with double-weighted term frequency and with full text +length normalization: + +```aql +FOR doc IN viewName + SEARCH ... + SORT BM25(doc, 2.4, 1) DESC + RETURN doc +``` + +### TFIDF() + +`TFIDF(doc, normalize) → score` + +Sorts documents using the +[**term frequency–inverse document frequency** algorithm](https://en.wikipedia.org/wiki/TF-IDF) +(TF-IDF). + +- **doc** (document): must be emitted by `FOR ... IN viewName` +- **normalize** (bool, _optional_): specifies whether scores should be + normalized. The default is `false`. +- returns **score** (number): computed ranking value + +{{< info >}} +The Analyzers used for indexing document attributes must have the `"frequency"` +feature enabled. The `TFIDF()` function will otherwise return a score of 0. +The Analyzers need to have the `"norm"` feature enabled, too, if you want to use +`TFIDF()` with the `normalize` parameter set to `true`. +{{< /info >}} + +#### Example: Sorting by default `TFIDF()` score + +Sort by relevance using the TF-IDF score: + +```aql +FOR doc IN viewName + SEARCH ... + SORT TFIDF(doc) DESC + RETURN doc +``` + +#### Example: Sorting by `TFIDF()` score with normalization + +Sort by relevance using a normalized TF-IDF score: + +```aql +FOR doc IN viewName + SEARCH ... + SORT TFIDF(doc, true) DESC + RETURN doc +``` + +#### Example: Sort by value and `TFIDF()` + +Sort by the value of the `text` attribute in ascending order, then by the TFIDF +score in descending order where the attribute values are equivalent: + +```aql +FOR doc IN viewName + SEARCH ... + SORT doc.text, TFIDF(doc) DESC + RETURN doc +``` + +## Search Highlighting Functions + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +### OFFSET_INFO() + +`OFFSET_INFO(doc, paths) → offsetInfo` + +Returns the attribute paths and substring offsets of matched terms, phrases, or +_n_-grams for search highlighting purposes. + +- **doc** (document): must be emitted by `FOR ... IN viewName` +- **paths** (string\|array): a string or an array of strings, each describing an + attribute and array element path you want to get the offsets for. Use `.` to + access nested objects, and `[n]` with `n` being an array index to specify array + elements. The attributes need to be indexed by Analyzers with the `offset` + feature enabled. +- returns **offsetInfo** (array): an array of objects, limited to a default of + 10 offsets per path. Each object has the following attributes: + - **name** (array): the attribute and array element path as an array of + strings and numbers. You can pass this name to the + [`VALUE()` function](document-object.md) to dynamically look up the value. + - **offsets** (array): an array of arrays with the matched positions. Each + inner array has two elements with the start offset and the length of a match. + + {{< warning >}} + The offsets describe the positions in bytes, not characters. You may need + to account for characters encoded using multiple bytes. + {{< /warning >}} + +--- + +`OFFSET_INFO(doc, rules) → offsetInfo` + +- **doc** (document): must be emitted by `FOR ... IN viewName` +- **rules** (array): an array of objects with the following attributes: + - **name** (string): an attribute and array element path + you want to get the offsets for. Use `.` to access nested objects, + and `[n]` with `n` being an array index to specify array elements. The + attributes need to be indexed by Analyzers with the `offset` feature enabled. + - **options** (object): an object with the following attributes: + - **maxOffsets** (number, _optional_): the total number of offsets to + collect per path. Default: `10`. + - **limits** (object, _optional_): an object with the following attributes: + - **term** (number, _optional_): the total number of term offsets to + collect per path. Default: 232. + - **phrase** (number, _optional_): the total number of phrase offsets to + collect per path. Default: 232. + - **ngram** (number, _optional_): the total number of _n_-gram offsets to + collect per path. Default: 232. +- returns **offsetInfo** (array): an array of objects, each with the following + attributes: + - **name** (array): the attribute and array element path as an array of + strings and numbers. You can pass this name to the + [`VALUE()`](document-object.md) to dynamically look up the value. + - **offsets** (array): an array of arrays with the matched positions, capped + to the specified limits. Each inner array has two elements with the start + offset and the length of a match. + + {{< warning >}} + The start offsets and lengths describe the positions in bytes, not characters. + You may need to account for characters encoded using multiple bytes. + {{< /warning >}} + +**Examples** + +Search a View and get the offset information for the matches: + +```js +--- +name: aqlOffsetInfo +description: '' +--- +~db._create("food"); +~db.food.save({ name: "avocado", description: { en: "The avocado is a medium-sized, evergreen tree, native to the Americas." } }); +~db.food.save({ name: "tomato", description: { en: "The tomato is the edible berry of the tomato plant." } }); +~var analyzers = require("@arangodb/analyzers"); +~var analyzer = analyzers.save("text_en_offset", "text", { locale: "en", stopwords: [] }, ["frequency", "norm", "position", "offset"]); +~db._createView("food_view", "arangosearch", { links: { food: { fields: { description: { fields: { en: { analyzers: ["text_en_offset"] } } } } } } }); +~assert(db._query(`FOR d IN food_view COLLECT WITH COUNT INTO c RETURN c`).toArray()[0] === 2); +db._query(` + FOR doc IN food_view + SEARCH ANALYZER(TOKENS("avocado tomato", "text_en_offset") ANY == doc.description.en, "text_en_offset") + RETURN OFFSET_INFO(doc, ["description.en"])`); +~db._dropView("food_view"); +~db._drop("food"); +~analyzers.remove(analyzer.name); +``` + +For full examples, see [Search Highlighting](../../indexes-and-search/arangosearch/search-highlighting.md). diff --git a/site/content/arangodb/oem/aql/functions/array.md b/site/content/arangodb/oem/aql/functions/array.md new file mode 100644 index 0000000000..74cfa4e788 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/array.md @@ -0,0 +1,1041 @@ +--- +title: Array functions in AQL +menuTitle: Array +weight: 10 +description: >- + AQL provides functions for higher-level array manipulation in addition to + language constructs that can also be used for arrays +--- +You can use the AQL functions listed below to work with lists of items. Also +see the [numeric functions](numeric.md) for functions that work on number arrays. + +If you want to concatenate the elements of an array equivalent to `join()` +in JavaScript, see [`CONCAT()`](string.md#concat) and +[`CONCAT_SEPARATOR()`](string.md#concat_separator) in the string functions chapter. + +Apart from that, AQL also offers several language constructs: + +- simple [array access](../fundamentals/data-types.md#arrays--lists) of individual elements, +- [array operators](../operators.md#array-operators) for array expansion and contraction, + optionally with inline filter, limit and projection, +- [array comparison operators](../operators.md#array-comparison-operators) to compare + each element in an array to a value or the elements of another array, +- loop-based operations on arrays using [FOR](../high-level-operations/for.md), + [SORT](../high-level-operations/sort.md), + [LIMIT](../high-level-operations/limit.md), + as well as [COLLECT](../high-level-operations/collect.md) for grouping, + which also offers efficient aggregation. + +## APPEND() + +`APPEND(anyArray, values, unique) → newArray` + +Add all elements of an array to another array. All values are added at the end of the +array (right side). + +It can also be used to append a single element to an array. It is not necessary to wrap +it in an array (unless it is an array itself). You may also use [`PUSH()`](#push) instead. + +- **anyArray** (array): array with elements of arbitrary type +- **values** (array\|any): array, whose elements shall be added to `anyArray` +- **unique** (bool, *optional*): if set to `true`, all duplicate values are + removed from the resulting array. If `values` is an empty array or if either + `anyArray` or `values` is `null`, then the other input array is returned + unmodified. The default is `false`. +- returns **newArray** (array): the modified array + +**Examples** + +```aql +--- +name: aqlArrayAppend_1 +description: '' +--- +RETURN APPEND([ 1, 2, 3 ], [ 5, 6, 9 ]) +``` + +```aql +--- +name: aqlArrayAppend_2 +description: '' +--- +RETURN APPEND([ 1, 2, 3 ], [ 3, 4, 5, 2, 9 ], true) +``` + +## CONTAINS_ARRAY() + +This is an alias for [`POSITION()`](#position). + +## COUNT() + +This is an alias for [`LENGTH()`](#length). + +## COUNT_DISTINCT() + +`COUNT_DISTINCT(anyArray) → number` + +Get the number of distinct elements in an array. + +- **anyArray** (array): array with elements of arbitrary type +- returns **number**: the number of distinct elements in *anyArray*. + +**Examples** + +```aql +--- +name: aqlArrayCountDistinct_1 +description: '' +--- +RETURN COUNT_DISTINCT([ 1, 2, 3 ]) +``` + +```aql +--- +name: aqlArrayCountDistinct_2 +description: '' +--- +RETURN COUNT_DISTINCT([ "yes", "no", "yes", "sauron", "no", "yes" ]) +``` + +## COUNT_UNIQUE() + +This is an alias for [`COUNT_DISTINCT()`](#count_distinct). + +## FIRST() + +`FIRST(anyArray) → firstElement` + +Get the first element of an array. It is the same as `anyArray[0]`. + +- **anyArray** (array): array with elements of arbitrary type +- returns **firstElement** (any\|null): the first element of *anyArray*, or *null* if + the array is empty. + +**Examples** + +```aql +--- +name: aqlArrayFirst_1 +description: '' +--- +RETURN FIRST([ 1, 2, 3 ]) +``` + +```aql +--- +name: aqlArrayFirst_2 +description: '' +--- +RETURN FIRST([]) +``` + +## FLATTEN() + +`FLATTEN(anyArray, depth) → flatArray` + +Turn an array of arrays into a flat array. All array elements in *array* will be +expanded in the result array. Non-array elements are added as they are. The function +will recurse into sub-arrays up to the specified depth. Duplicates will not be removed. + +Also see [array contraction](../operators.md#array-contraction). + +- **array** (array): array with elements of arbitrary type, including nested arrays +- **depth** (number, *optional*): flatten up to this many levels, the default is 1 +- returns **flatArray** (array): a flattened array + +**Examples** + +```aql +--- +name: aqlArrayFlatten_1 +description: '' +--- +RETURN FLATTEN( [ 1, 2, [ 3, 4 ], 5, [ 6, 7 ], [ 8, [ 9, 10 ] ] ] ) +``` + +To fully flatten the example array, use a *depth* of 2: + +```aql +--- +name: aqlArrayFlatten_2 +description: '' +--- +RETURN FLATTEN( [ 1, 2, [ 3, 4 ], 5, [ 6, 7 ], [ 8, [ 9, 10 ] ] ], 2 ) +``` + +## INTERLEAVE() + +`INTERLEAVE(array1, array2, ... arrayN) → newArray` + +Accepts an arbitrary number of arrays and produces a new array with the elements +interleaved. It iterates over the input arrays in a round robin fashion, picks one element +from each array per iteration, and combines them in that sequence into a result array. +The input arrays can have different amounts of elements. + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple + arguments (at least 2) +- returns **newArray** (array): the interleaved array + +**Examples** + +```aql +--- +name: aqlArrayInterleave_1 +description: '' +--- +RETURN INTERLEAVE( [1, 1, 1], [2, 2, 2], [3, 3, 3] ) +``` + +```aql +--- +name: aqlArrayInterleave_2 +description: '' +--- +RETURN INTERLEAVE( [ 1 ], [2, 2], [3, 3, 3] ) +``` + +```aql +--- +name: aqlArrayInterleave_3 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 1..3 OUTBOUND 'places/Toronto' GRAPH 'kShortestPathsGraph' + RETURN INTERLEAVE(p.vertices[*]._id, p.edges[*]._id) +``` + +## INTERSECTION() + +`INTERSECTION(array1, array2, ... arrayN) → newArray` + +Return the intersection of all arrays specified. The result is an array of values that +occur in all arguments. + +Other set operations are [`UNION()`](#union), [`MINUS()`](#minus) and +[`OUTERSECTION()`](#outersection). + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple arguments + (at least 2) +- returns **newArray** (array): a single array with only the elements, which exist in all + provided arrays. The element order is random. Duplicates are removed. + +**Examples** + +```aql +--- +name: aqlArrayIntersection_1 +description: '' +--- +RETURN INTERSECTION( [1,2,3,4,5], [2,3,4,5,6], [3,4,5,6,7] ) +``` + +```aql +--- +name: aqlArrayIntersection_2 +description: '' +--- +RETURN INTERSECTION( [2,4,6], [8,10,12], [14,16,18] ) +``` + +## JACCARD() + +`JACCARD(array1, array2) → jaccardIndex` + +Calculate the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) +of two arrays. + +This similarity measure is also known as _Intersection over Union_ and could +be computed (less efficient and more verbose) as follows: + +```aql +COUNT(a) == 0 && COUNT(b) == 0 +? 1 // two empty sets have a similarity of 1 by definition +: COUNT(INTERSECTION(array1, array2)) / COUNT(UNION_DISTINCT(array1, array2)) +``` + +- **array1** (array): array with elements of arbitrary type +- **array2** (array): array with elements of arbitrary type +- returns **jaccardIndex** (number): calculated Jaccard index of the input + arrays *array1* and *array2* + +```aql +--- +name: aqlArrayJaccard_1 +description: '' +--- +RETURN JACCARD( [1,2,3,4], [3,4,5,6] ) +``` + +```aql +--- +name: aqlArrayJaccard_2 +description: '' +--- +RETURN JACCARD( [1,1,2,2,2,3], [2,2,3,4] ) +``` + +```aql +--- +name: aqlArrayJaccard_3 +description: '' +--- +RETURN JACCARD( [1,2,3], [] ) +``` + +```aql +--- +name: aqlArrayJaccard_4 +description: '' +--- +RETURN JACCARD( [], [] ) +``` + +## LAST() + +`LAST(anyArray) → lastElement` + +Get the last element of an array. It is the same as `anyArray[-1]`. + +- **anyArray** (array): array with elements of arbitrary type +- returns **lastElement** (any\|null): the last element of *anyArray* or *null* if the + array is empty. + +**Example** + +```aql +--- +name: aqlArrayLast_1 +description: '' +--- +RETURN LAST( [1,2,3,4,5] ) +``` + +## LENGTH() + +`LENGTH(anyArray) → length` + +Determine the number of elements in an array. + +- **anyArray** (array): array with elements of arbitrary type +- returns **length** (number): the number of array elements in *anyArray*. + +`LENGTH()` can also determine the [number of attribute keys](document-object.md#length) +of an object / document, the [amount of documents](miscellaneous.md#length) in a +collection and the [character length](string.md#length) of a string. + +| Input | Length | +|--------|--------| +| String | Number of Unicode characters | +| Number | Number of Unicode characters that represent the number | +| Array | Number of elements | +| Object | Number of first level elements | +| true | 1 | +| false | 0 | +| null | 0 | + +**Examples** + +```aql +--- +name: aqlArrayLength_1 +description: '' +--- +RETURN LENGTH( "🥑" ) +``` + +```aql +--- +name: aqlArrayLength_2 +description: '' +--- +RETURN LENGTH( 1234 ) +``` + +```aql +--- +name: aqlArrayLength_3 +description: '' +--- +RETURN LENGTH( [1,2,3,4,5,6,7] ) +``` + +```aql +--- +name: aqlArrayLength_4 +description: '' +--- +RETURN LENGTH( false ) +``` + +```aql +--- +name: aqlArrayLength_5 +description: '' +--- +RETURN LENGTH( {a:1, b:2, c:3, d:4, e:{f:5,g:6}} ) +``` + +## MINUS() + +`MINUS(array1, array2, ... arrayN) → newArray` + +Return the difference of all arrays specified. + +Other set operations are [`UNION()`](#union), [`INTERSECTION()`](#intersection) +and [`OUTERSECTION()`](#outersection). + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple + arguments (at least 2) +- returns **newArray** (array): an array of values that occur in the first array, + but not in any of the subsequent arrays. The order of the result array is undefined + and should not be relied on. Duplicates will be removed. + +**Example** + +```aql +--- +name: aqlArrayMinus_1 +description: '' +--- +RETURN MINUS( [1,2,3,4], [3,4,5,6], [5,6,7,8] ) +``` + +## NTH() + +`NTH(anyArray, position) → nthElement` + +Get the element of an array at a given position. It is the same as `anyArray[position]` +for positive positions, but does not support negative positions. + +- **anyArray** (array): array with elements of arbitrary type +- **position** (number): position of desired element in array, positions start at 0 +- returns **nthElement** (any\|null): the array element at the given *position*. + If *position* is negative or beyond the upper bound of the array, + then *null* will be returned. + +**Examples** + +```aql +--- +name: aqlArrayNth_1 +description: '' +--- +RETURN NTH( [ "foo", "bar", "baz" ], 2 ) +``` + +```aql +--- +name: aqlArrayNth_2 +description: '' +--- +RETURN NTH( [ "foo", "bar", "baz" ], 3 ) +``` + +```aql +--- +name: aqlArrayNth_3 +description: '' +--- +RETURN NTH( [ "foo", "bar", "baz" ], -1 ) +``` + +## OUTERSECTION() + +`OUTERSECTION(array1, array2, ... arrayN) → newArray` + +Return the values that occur only once across all arrays specified. + +Other set operations are [`UNION()`](#union), [`MINUS()`](#minus) and +[`INTERSECTION()`](#intersection). + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple arguments + (at least 2) +- returns **newArray** (array): a single array with only the elements that exist only once + across all provided arrays. The element order is random. + +**Example** + +```aql +--- +name: aqlArrayOutersection_1 +description: '' +--- +RETURN OUTERSECTION( [ 1, 2, 3 ], [ 2, 3, 4 ], [ 3, 4, 5 ] ) +``` + +## POP() + +`POP(anyArray) → newArray` + +Remove the last element of *array*. + +To append an element (right side), see [`PUSH()`](#push).\ +To remove the first element, see [`SHIFT()`](#shift).\ +To remove an element at an arbitrary position, see [`REMOVE_NTH()`](#remove_nth). + +- **anyArray** (array): an array with elements of arbitrary type +- returns **newArray** (array): *anyArray* without the last element. If it's already + empty or has only a single element left, an empty array is returned. + +**Examples** + +```aql +--- +name: aqlArrayPop_1 +description: '' +--- +RETURN POP( [ 1, 2, 3, 4 ] ) +``` + +```aql +--- +name: aqlArrayPop_2 +description: '' +--- +RETURN POP( [ 1 ] ) +``` + +## POSITION() + +`POSITION(anyArray, search, returnIndex) → position` + +Return whether *search* is contained in *array*. Optionally return the position. + +- **anyArray** (array): the haystack, an array with elements of arbitrary type +- **search** (any): the needle, an element of arbitrary type +- **returnIndex** (bool, *optional*): if set to *true*, the position of the match + is returned instead of a boolean. The default is *false*. +- returns **position** (bool\|number): *true* if *search* is contained in *anyArray*, + *false* otherwise. If *returnIndex* is enabled, the position of the match is + returned (positions start at 0), or *-1* if it's not found. + +If you want to check if a value is in an array, you can alternatively use +the [`IN` operator](../operators.md#comparison-operators), for example, +`3 IN [1, 2, 3]` instead of `POSITION([1, 2, 3], 3)`. + +To determine if or at which position a string occurs in another string, see the +[`CONTAINS()` string function](string.md#contains). + +**Examples** + +Test whether a value is contained in an array: + +```aql +--- +name: aqlArrayPosition_1 +description: '' +--- +RETURN POSITION( [2,4,6,8], 4 ) +``` + +Return the position of the match, i.e. the array index, or `-1` if the value is +not contained in the array: + +```aql +--- +name: aqlArrayPosition_2 +description: '' +--- +RETURN POSITION( [2,4,6,8], 4, true ) +``` + +If you want to search a list of objects, you can use the +[array expansion operator `[*]`](../operators.md#array-expansion). +For example, you can get an attribute from each object using the operator, and +then determine the array index of the first match using the `POSITION()` function: + +```aql +--- +name: aqlArrayPosition_3 +description: '' +--- +LET arr = [ { value: "foo" }, { value: "bar" }, { value: "baz" }, { value: "bay"} ] +RETURN POSITION(arr[*].value, "baz", true) +``` + +If you are not interested in the actual position but only want to check for +existence, you may use the `IN` operator instead of calling `POSITION()`, like +`"baz" IN arr[*].value`. + +## PUSH() + +`PUSH(anyArray, value, unique) → newArray` + +Append *value* to *anyArray* (right side). + +To remove the last element, see [`POP()`](#pop).\ +To prepend a value (left side), see [`UNSHIFT()`](#unshift).\ +To append multiple elements, see [`APPEND()`](#append). + +- **anyArray** (array): array with elements of arbitrary type +- **value** (any): an element of arbitrary type +- **unique** (bool): if set to *true*, then *value* is not added if already + present in the array. The default is *false*. +- returns **newArray** (array): *anyArray* with *value* added at the end + (right side) + +Note: The *unique* flag only controls if *value* is added if it's already present +in *anyArray*. Duplicate elements that already exist in *anyArray* will not be +removed. To make an array unique, use the [`UNIQUE()`](#unique) function. + +**Examples** + +```aql +--- +name: aqlArrayPush_1 +description: '' +--- +RETURN PUSH([ 1, 2, 3 ], 4) +``` + +```aql +--- +name: aqlArrayPush_2 +description: '' +--- +RETURN PUSH([ 1, 2, 2, 3 ], 2, true) +``` + +## REMOVE_NTH() + +`REMOVE_NTH(anyArray, position) → newArray` + +Remove the element at *position* from the *anyArray*. + +To remove the first element, see [`SHIFT()`](#shift).\ +To remove the last element, see [`POP()`](#pop). + +- **anyArray** (array): array with elements of arbitrary type +- **position** (number): the position of the element to remove. Positions start + at 0. Negative positions are supported, with -1 being the last array element. + If *position* is out of bounds, the array is returned unmodified. +- returns **newArray** (array): *anyArray* without the element at *position* + +**Examples** + +```aql +--- +name: aqlArrayRemoveNth_1 +description: '' +--- +RETURN REMOVE_NTH( [ "a", "b", "c", "d", "e" ], 1 ) +``` + +```aql +--- +name: aqlArrayRemoveNth_2 +description: '' +--- +RETURN REMOVE_NTH( [ "a", "b", "c", "d", "e" ], -2 ) +``` + +## REPLACE_NTH() + +`REPLACE_NTH(anyArray, position, replaceValue, defaultPaddingValue) → newArray` + +Replace the element at *position* in *anyArray* with *replaceValue*. + +- **anyArray** (array): array with elements of arbitrary type +- **position** (number): the position of the element to replace. Positions start + at 0. Negative positions are supported, with -1 being the last array element. + If a negative *position* is out of bounds, then it is set to the first element (0) +- **replaceValue** the value to be inserted at *position* +- **defaultPaddingValue** to be used for padding if *position* is two or more + elements beyond the last element in *anyArray* +- returns **newArray** (array): *anyArray* with the element at *position* + replaced by *replaceValue*, or appended to *anyArray* and possibly padded by + *defaultPaddingValue* + +It is allowed to specify a position beyond the upper array boundary: +- *replaceValue* is appended if *position* is equal to the array length +- if it is higher, *defaultPaddingValue* is appended to *anyArray* as many + times as needed to place *replaceValue* at *position* +- if no *defaultPaddingValue* is supplied in above case, then a query error + is raised + +**Examples** + +```aql +--- +name: aqlArrayReplaceNth_1 +description: '' +--- +RETURN REPLACE_NTH( [ "a", "b", "c" ], 1 , "z") +``` + +```aql +--- +name: aqlArrayReplaceNth_2 +description: '' +--- +RETURN REPLACE_NTH( [ "a", "b", "c" ], 3 , "z") +``` + +```aql +--- +name: aqlArrayReplaceNth_4 +description: '' +--- +RETURN REPLACE_NTH( [ "a", "b", "c" ], 6, "z", "y" ) +``` + +```aql +--- +name: aqlArrayReplaceNth_5 +description: '' +--- +RETURN REPLACE_NTH( [ "a", "b", "c" ], -1, "z" ) +``` + +```aql +--- +name: aqlArrayReplaceNth_6 +description: '' +--- +RETURN REPLACE_NTH( [ "a", "b", "c" ], -9, "z" ) +``` + +Trying to access out of bounds, without providing a padding value will result in an error: + +```js +--- +name: aqlArrayReplaceNth_3 +description: '' +--- +db._query('RETURN REPLACE_NTH( [ "a", "b", "c" ], 6 , "z")'); // xpError(ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH) +``` + +## REMOVE_VALUE() + +`REMOVE_VALUE(anyArray, value, limit) → newArray` + +Remove all occurrences of *value* in *anyArray*. Optionally with a *limit* +to the number of removals. + +- **anyArray** (array): array with elements of arbitrary type +- **value** (any): an element of arbitrary type +- **limit** (number, *optional*): cap the number of removals to this value +- returns **newArray** (array): *anyArray* with *value* removed + +**Examples** + +```aql +--- +name: aqlArrayRemoveValue_1 +description: '' +--- +RETURN REMOVE_VALUE( [ "a", "b", "b", "a", "c" ], "a" ) +``` + +```aql +--- +name: aqlArrayRemoveValue_2 +description: '' +--- +RETURN REMOVE_VALUE( [ "a", "b", "b", "a", "c" ], "a", 1 ) +``` + +## REMOVE_VALUES() + +`REMOVE_VALUES(anyArray, values) → newArray` + +Remove all occurrences of any of the *values* from *anyArray*. + +- **anyArray** (array): array with elements of arbitrary type +- **values** (array): an array with elements of arbitrary type, that shall + be removed from *anyArray* +- returns **newArray** (array): *anyArray* with all individual *values* removed + +**Example** + +```aql +--- +name: aqlArrayRemoveValues_1 +description: '' +--- +RETURN REMOVE_VALUES( [ "a", "a", "b", "c", "d", "e", "f" ], [ "a", "f", "d" ] ) +``` + +## REVERSE() + +`REVERSE(anyArray) → reversedArray` + +Return an array with its elements reversed. + +- **anyArray** (array): array with elements of arbitrary type +- returns **reversedArray** (array): a new array with all elements of *anyArray* in + reversed order + +**Example** + +```aql +--- +name: aqlArrayReverse_1 +description: '' +--- +RETURN REVERSE ( [2,4,6,8,10] ) +``` + +## SHIFT() + +`SHIFT(anyArray) → newArray` + +Remove the first element of *anyArray*. + +To prepend an element (left side), see [`UNSHIFT()`](#unshift).\ +To remove the last element, see [`POP()`](#pop).\ +To remove an element at an arbitrary position, see [`REMOVE_NTH()`](#remove_nth). + +- **anyArray** (array): array with elements with arbitrary type +- returns **newArray** (array): *anyArray* without the left-most element. If *anyArray* + is already empty or has only one element left, an empty array is returned. + +**Examples** + +```aql +--- +name: aqlArrayShift_1 +description: '' +--- +RETURN SHIFT( [ 1, 2, 3, 4 ] ) +``` + +```aql +--- +name: aqlArrayShift_2 +description: '' +--- +RETURN SHIFT( [ 1 ] ) +``` + +## SLICE() + +`SLICE(anyArray, start, length) → newArray` + +Extract a slice of *anyArray*. + +- **anyArray** (array): array with elements of arbitrary type +- **start** (number): start extraction at this element. Positions start at 0. + Negative values indicate positions from the end of the array. +- **length** (number, *optional*): extract up to *length* elements, or all + elements from *start* up to *length* if negative (exclusive) +- returns **newArray** (array): the specified slice of *anyArray*. If *length* + is not specified, all array elements starting at *start* will be returned. + +**Examples** + +```aql +--- +name: aqlArraySlice_1 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], 0, 1 ) +``` + +```aql +--- +name: aqlArraySlice_2 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], 1, 2 ) +``` + +```aql +--- +name: aqlArraySlice_3 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], 3 ) +``` + +```aql +--- +name: aqlArraySlice_4 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], 1, -1 ) +``` + +```aql +--- +name: aqlArraySlice_5 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], 0, -2 ) +``` + +```aql +--- +name: aqlArraySlice_6 +description: '' +--- +RETURN SLICE( [ 1, 2, 3, 4, 5 ], -3, 2 ) +``` + +## SORTED() + +`SORTED(anyArray) → newArray` + +Sort all elements in *anyArray*. The function will use the default comparison +order for AQL value types. + +- **anyArray** (array): array with elements of arbitrary type +- returns **newArray** (array): *anyArray*, with elements sorted + +**Example** + +```aql +--- +name: aqlArraySorted_1 +description: '' +--- +RETURN SORTED( [ 8,4,2,10,6 ] ) +``` + +## SORTED_UNIQUE() + +`SORTED_UNIQUE(anyArray) → newArray` + +Sort all elements in *anyArray*. The function will use the default comparison +order for AQL value types. Additionally, the values in the result array will +be made unique. + +- **anyArray** (array): array with elements of arbitrary type +- returns **newArray** (array): *anyArray*, with elements sorted and duplicates + removed + +**Example** + +```aql +--- +name: aqlArraySortedUnique_1 +description: '' +--- +RETURN SORTED_UNIQUE( [ 8,4,2,10,6,2,8,6,4 ] ) +``` + +## UNION() + +`UNION(array1, array2, ... arrayN) → newArray` + +Return the union of all arrays specified. + +Other set operations are [`MINUS()`](#minus), [`INTERSECTION()`](#intersection) +and [`OUTERSECTION()`](#outersection). + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple + arguments (at least 2) +- returns **newArray** (array): all array elements combined in a single array, + in any order + +**Examples** + +```aql +--- +name: aqlArrayUnion_1 +description: '' +--- +RETURN UNION( + [ 1, 2, 3 ], + [ 1, 2 ] +) +``` + +Note: No duplicates will be removed. In order to remove duplicates, please use +either [`UNION_DISTINCT()`](#union_distinct) or apply [`UNIQUE()`](#unique) on the +result of `UNION()`: + +```aql +--- +name: aqlArrayUnion_2 +description: '' +--- +RETURN UNIQUE( + UNION( + [ 1, 2, 3 ], + [ 1, 2 ] + ) +) +``` + +## UNION_DISTINCT() + +`UNION_DISTINCT(array1, array2, ... arrayN) → newArray` + +Return the union of distinct values of all arrays specified. + +- **arrays** (array, *repeatable*): an arbitrary number of arrays as multiple + arguments (at least 2) +- returns **newArray** (array): the elements of all given arrays in a single + array, without duplicates, in any order + +**Example** + +```aql +--- +name: aqlArrayUnionDistinct_1 +description: '' +--- +RETURN UNION_DISTINCT( + [ 1, 2, 3 ], + [ 1, 2 ] +) +``` + +## UNIQUE() + +`UNIQUE(anyArray) → newArray` + +Return all unique elements in *anyArray*. To determine uniqueness, the +function will use the comparison order. + +- **anyArray** (array): array with elements of arbitrary type +- returns **newArray** (array): *anyArray* without duplicates, in any order + +**Example** + +```aql +--- +name: aqlArrayUnique_1 +description: '' +--- +RETURN UNIQUE( [ 1,2,2,3,3,3,4,4,4,4,5,5,5,5,5 ] ) +``` + +## UNSHIFT() + +`UNSHIFT(anyArray, value, unique) → newArray` + +Prepend *value* to *anyArray* (left side). + +To remove the first element, see [`SHIFT()`](#shift).\ +To append a value (right side), see [`PUSH()`](#push). + +- **anyArray** (array): array with elements of arbitrary type +- **value** (any): an element of arbitrary type +- **unique** (bool): if set to *true*, then *value* is not added if already + present in the array. The default is *false*. +- returns **newArray** (array): *anyArray* with *value* added at the start + (left side) + +Note: The *unique* flag only controls if *value* is added if it's already present +in *anyArray*. Duplicate elements that already exist in *anyArray* will not be +removed. To make an array unique, use the [`UNIQUE()`](#unique) function. + +**Examples** + +```aql +--- +name: aqlArrayUnshift_1 +description: '' +--- +RETURN UNSHIFT( [ 1, 2, 3 ], 4 ) +``` + +```aql +--- +name: aqlArrayUnshift_2 +description: '' +--- +RETURN UNSHIFT( [ 1, 2, 3 ], 2, true ) +``` diff --git a/site/content/arangodb/oem/aql/functions/bit.md b/site/content/arangodb/oem/aql/functions/bit.md new file mode 100644 index 0000000000..bca40a82f6 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/bit.md @@ -0,0 +1,321 @@ +--- +title: Bit functions in AQL +menuTitle: Bit +weight: 15 +description: >- + AQL offers a set of bit manipulation and interpretation functions for bitwise + arithmetic +--- +Bit functions can operate on numeric integer values in the range between 0 +and 4294967295 (232 - 1), both included. This allows treating numbers as +bitsets of up to 32 members. Using any of the bit functions on numbers outside +the supported range will make the function return `null` and register a warning. + +The value range for the bit functions is conservatively small, so that no +precision loss or rounding errors should occur when the input/output values of +bit functions are passed around or sent over the wire to client applications +with unknown precision number types. + +## BIT_AND() + +`BIT_AND(numbersArray) → result` + +And-combines the numeric values in *numbersArray* into a single numeric result +value. + +- **numbersArray** (array): array with numeric input values +- returns **result** (number\|null): and-combined result + +The function expects an array with numeric values as its input. The values in +the array must be numbers, which must not be negative. The maximum supported +input number value is 232 - 1. Input number values outside the allowed +range will make the function return `null` and produce a warning. Any `null` +values in the input array are ignored. + +--- + +`BIT_AND(value1, value2) → result` + +If two numbers are passed as individual function parameters to `BIT_AND()`, it +will return the bitwise and value of its two operands. Only numbers in the +range 0 to 232 - 1 are allowed as input values. + +- **value1** (number): first operand +- **value2** (number): second operand +- returns **result** (number\|null): and-combined result + +```aql +BIT_AND([1, 4, 8, 16]) // 0 +BIT_AND([3, 7, 63]) // 3 +BIT_AND([255, 127, null, 63]) // 63 +BIT_AND(127, 255) // 127 +BIT_AND("foo") // null +``` + +## BIT_CONSTRUCT() + +`BIT_CONSTRUCT(positionsArray) → result` + +Construct a number value with its bits set at the positions given in the array. + +- **positionArray** (array): array with bit positions to set (zero-based) +- returns **result** (number\|null): the generated number + +The function expects an array with numeric values as its input. The values in +the array must be numbers, which must not be negative. The maximum supported +input number value is 31. Input number values outside the allowed range will +make the function return `null` and produce a warning. + +```aql +BIT_CONSTRUCT([1, 2, 3]) // 14 +BIT_CONSTRUCT([0, 4, 8]) // 273 +BIT_CONSTRUCT([0, 1, 10, 31]) // 2147484675 +``` + +## BIT_DECONSTRUCT() + +`BIT_DECONSTRUCT(number) → positionsArray` + +Deconstruct a number value into an array with the positions of its set bits. + +- **number** (number): the input value to deconstruct +- returns **positionArray** (array\|null): array with bit positions set (zero-based) + +The function turns a numeric value into an array with the positions of all its +set bits. The positions in the output array are zero-based. +The input value must be a number between 0 and 232 - 1 (including). +The function will return `null` for any other inputs and produce a warning. + +```aql +BIT_DECONSTRUCT(14) // [1, 2, 3] +BIT_DECONSTRUCT(273) // [0, 4, 8] +BIT_DECONSTRUCT(2147484675) // [0, 1, 10, 31] +``` + +## BIT_FROM_STRING() + +`BIT_FROM_STRING(bitstring) → number` + +Converts a bitstring (consisting of digits `0` and `1`) into a number. + +To convert a number into a bitstring, see [`BIT_TO_STRING()`](#bit_to_string). + +- **bitstring** (string): string sequence consisting of `0` and `1` characters +- returns **number** (number\|null): the parsed number + +The input value must be a bitstring, consisting only of `0` and `1` characters. +The bitstring can contain up to 32 significant bits, including any leading zeros. +Note that the bitstring must not start with `0b`. +If the bitstring has an invalid format, this function returns `null` and produces +a warning. + +```aql +BIT_FROM_STRING("0111") // 7 +BIT_FROM_STRING("000000000000010") // 2 +BIT_FROM_STRING("11010111011101") // 13789 +BIT_FROM_STRING("100000000000000000000") // 1048756 +``` + +## BIT_NEGATE() + +`BIT_NEGATE(number, bits) → result` + +Bitwise-negates the bits in **number**, and keeps up to **bits** bits in the +result. + +- **number** (number): the number to negate +- **bits** (number): number of bits to keep in the result (0 to 32) +- returns **result** (number\|null): the resulting number, with up to **bits** + significant bits + +The input value must be a number between 0 and 232 - 1 (including). +The number of bits must be between 0 and 32. The function will return `null` for +any other inputs and produce a warning. + +```aql +BIT_NEGATE(0, 8) // 255 +BIT_NEGATE(0, 10) // 1023 +BIT_NEGATE(3, 4) // 12 +BIT_NEGATE(446359921, 32) // 3848607374 +``` + +## BIT_OR() + +`BIT_OR(numbersArray) → result` + +Or-combines the numeric values in *numbersArray* into a single numeric result +value. + +- **numbersArray** (array): array with numeric input values +- returns **result** (number\|null): or-combined result + +The function expects an array with numeric values as its input. The values in +the array must be numbers, which must not be negative. The maximum supported +input number value is 232 - 1. Input number values outside the +allowed range will make the function return `null` and produce a warning. +Any `null` values in the input array are ignored. + +--- + +`BIT_OR(value1, value2) → result` + +If two numbers are passed as individual function parameters to `BIT_OR()`, it +will return the bitwise or value of its two operands. Only numbers in the range +0 to 232 - 1 are allowed as input values. + +- **value1** (number): first operand +- **value2** (number): second operand +- returns **result** (number\|null): or-combined result + +```aql +BIT_OR([1, 4, 8, 16]) // 29 +BIT_OR([3, 7, 63]) // 63 +BIT_OR([255, 127, null, 63]) // 255 +BIT_OR(255, 127) // 255 +BIT_OR("foo") // null +``` + +## BIT_POPCOUNT() + +`BIT_POPCOUNT(number) → result` + +Counts the number of bits set in the input value. + +- **number** (number): array with numeric input values +- returns **result** (number\|null): number of bits set in the input value + +The input value must be a number between 0 and 232 - 1 (including). +The function will return `null` for any other inputs and produce a warning. + +```aql +BIT_POPCOUNT(0) // 0 +BIT_POPCOUNT(255) // 8 +BIT_POPCOUNT(69399252) // 12 +BIT_POPCOUNT("foo") // null +``` + +## BIT_SHIFT_LEFT() + +`BIT_SHIFT_LEFT(number, shift, bits) → result` + +Bitwise-shifts the bits in **number** to the left, and keeps up to **bits** +bits in the result. When bits overflow due to the shift, they are discarded. + +- **number** (number): the number to shift +- **shift** (number): number of bits to shift (0 to 32) +- **bits** (number): number of bits to keep in the result (0 to 32) +- returns **result** (number\|null): the resulting number, with up to **bits** + significant bits + +The input value must be a number between 0 and 232 - 1 (including). +The number of bits must be between 0 and 32. The function will return `null` for +any other inputs and produce a warning. + +```aql +BIT_SHIFT_LEFT(0, 1, 8) // 0 +BIT_SHIFT_LEFT(7, 1, 16) // 14 +BIT_SHIFT_LEFT(2, 10, 16) // 2048 +BIT_SHIFT_LEFT(878836, 16, 32) // 1760821248 +``` + +## BIT_SHIFT_RIGHT() + +`BIT_SHIFT_RIGHT(number, shift, bits) → result` + +Bitwise-shifts the bits in **number** to the right, and keeps up to **bits** +bits in the result. When bits overflow due to the shift, they are discarded. + +- **number** (number): the number to shift +- **shift** (number): number of bits to shift (0 to 32) +- **bits** (number): number of bits to keep in the result (0 to 32) +- returns **result** (number\|null): the resulting number, with up to **bits** + significant bits + +The input value must be a number between 0 and 232 - 1 (including). +The number of bits must be between 0 and 32. The function will return `null` for +any other inputs and produce a warning. + +```aql +BIT_SHIFT_RIGHT(0, 1, 8) // 0 +BIT_SHIFT_RIGHT(33, 1, 16) // 16 +BIT_SHIFT_RIGHT(65536, 13, 16) // 8 +BIT_SHIFT_RIGHT(878836, 4, 32) // 54927 +``` + +## BIT_TEST() + +`BIT_TEST(number, index) → result` + +Tests if the at position *index* is set in **number**. + +- **number** (number): the number to test +- **index** (number): index of the bit to test (0 to 31) +- returns **result** (boolean\|null): whether or not the bit was set + +The input value must be a number between 0 and 232 - 1 (including). +The **index** must be between 0 and 31. The function will return `null` for any +other inputs and produce a warning. + +```aql +BIT_TEST(0, 3) // false +BIT_TEST(255, 0) // true +BIT_TEST(7, 2) // true +BIT_TEST(255, 8) // false +``` + +## BIT_TO_STRING() + +`BIT_TO_STRING(number) → bitstring` + +Converts a numeric input value into a bitstring, consisting of `0` and `1`. + +To convert a bitstring into a number, see [`BIT_FROM_STRING()`](#bit_from_string). + +- **number** (number): the number to stringify +- returns **bitstring** (string\|null): bitstring generated from the input value + +The input value must be a number between 0 and 232 - 1 (including). +The function will return `null` for any other inputs and produce a warning. + +```aql +BIT_TO_STRING(7, 4) // "0111" +BIT_TO_STRING(255, 8) // "11111111" +BIT_TO_STRING(60, 8) // "00011110" +BIT_TO_STRING(1048576, 32) // "00000000000100000000000000000000" +``` + +## BIT_XOR() + +`BIT_XOR(numbersArray) → result` + +Exclusive-or-combines the numeric values in *numbersArray* into a single +numeric result value. + +- **numbersArray** (array): array with numeric input values +- returns **result** (number\|null): xor-combined result + +The function expects an array with numeric values as its input. The values in +the array must be numbers, which must not be negative. The maximum supported +input number value is 232 - 1. Input number values outside the +allowed range will make the function return `null` and produce a warning. +Any `null` values in the input array are ignored. + +--- + +`BIT_XOR(value1, value2) → result` + +If two numbers are passed as individual function parameters to `BIT_XOR()`, it +will return the bitwise exclusive or value of its two operands. Only numbers in +the range 0 to 232 - 1 are allowed as input values. + +- **value1** (number): first operand +- **value2** (number): second operand +- returns **result** (number\|null): xor-combined result + +```aql +BIT_XOR([1, 4, 8, 16]) // 29 +BIT_XOR([3, 7, 63]) // 59 +BIT_XOR([255, 127, null, 63]) // 191 +BIT_XOR(255, 257) // 510 +BIT_XOR("foo") // null +``` diff --git a/site/content/arangodb/oem/aql/functions/date.md b/site/content/arangodb/oem/aql/functions/date.md new file mode 100644 index 0000000000..8e6904215b --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/date.md @@ -0,0 +1,1335 @@ +--- +title: Date functions in AQL +menuTitle: Date +weight: 20 +description: >- + AQL includes functions to work with dates as numeric timestamps and as + ISO 8601 date time strings +--- +## Date and time representations + +AQL offers functionality to work with dates, but it does not have a special data type +for dates (neither does JSON, which is usually used as format to ship data into and +out of ArangoDB). Instead, dates in AQL are represented by either numbers or strings. + +All date function operations are done in the *Unix time* system. Unix time counts +all non leap seconds beginning with January 1st 1970 00:00:00.000 UTC, also know as +the Unix epoch. A point in time is called timestamp. A timestamp has the same value +at every point on earth. The date functions use millisecond precision for timestamps. + +Time unit definitions: + +- **millisecond**: 1/1000 of a second +- **second**: one [SI second](https://www.bipm.org/en/si-base-units/second) +- **minute**: one minute is defined as 60 seconds +- **hour**: one hour is defined as 60 minutes +- **day**: one day is defined as 24 hours +- **week**: one week is defined as 7 days +- **month**: one month is defined as 1/12 of a year +- **year**: one year is defined as 365.2425 days + +All functions that require dates as arguments accept the following input values: + +- **numeric timestamps**, millisecond precision. + + An example timestamp value is `1399472349522`, which translates to + `2014-05-07T14:19:09.522Z`. + + Valid range: `-62167219200000` .. `253402300799999` (inclusive) + +- **date time strings** in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format: + - `YYYY-MM-DDTHH:MM:SS.MMM` + - `YYYY-MM-DD HH:MM:SS.MMM` + - `YYYY-MM-DD` + + Milliseconds (`.MMM`) are always optional. Two digits for the hours (`HH`), + minutes (`MM`) and seconds (`SS`) are mandatory, i.e. zero-padding is required + for the values 0 through 9 (e.g. `05` instead of `5`). Leading zeroes for the + year (`YYYY`), month (`MM`) and day (`DD`) can be left out, but is discouraged. + + A time offset may optionally be added at the end of the string, with the + hours and minutes that need to be added or subtracted to the date time value. + For example, `2014-05-07T14:19:09+01:00` can be used to specify a one hour offset, + and `2014-05-07T14:19:09+07:30` can be specified for seven and half hours offset. + Negative offsets are also possible. Alternatively to an offset, a `Z` can be used + to indicate UTC / Zulu time. An example value is `2014-05-07T14:19:09.522Z` + meaning May 7th 2014, 14:19:09 and 522 milliseconds, UTC / Zulu time. + Another example value without time component is `2014-05-07Z`. + + Valid range: `"0000-01-01T00:00:00.000Z"` .. `"9999-12-31T23:59:59.999Z"` (inclusive) + +Any date/time values outside the valid range that are passed into an AQL date +function makes the function return `null` and trigger a warning for the query, +which can optionally be escalated to an error and abort the query. This also +applies to operations which produce an invalid value. + +```aql +DATE_HOUR( 2 * 60 * 60 * 1000 ) // 2 +DATE_HOUR("1970-01-01T02:00:00") // 2 +``` + +You are free to store age determinations of specimens, incomplete or fuzzy dates and +the like in different, more appropriate ways of course. AQL's date functions are +most certainly not of any help for such dates, but you can still use language +constructs like [SORT](../high-level-operations/sort.md) (which also supports sorting of arrays) +and [indexes](../../indexes-and-search/indexing/_index.md). + +## Current date and time + +### DATE_NOW() + +`DATE_NOW() → timestamp` + +Get the current unix time as numeric timestamp. + +- returns **timestamp** (number): the current unix time as a timestamp. + The return value has millisecond precision. To convert the return value to + seconds, divide it by 1000. + +Note that this function is evaluated on every invocation and may return +different values when invoked multiple times in the same query. Assign it +to a variable to use the exact same timestamp multiple times. + +## Conversion + +`DATE_TIMESTAMP()` and `DATE_ISO8601()` can be used to convert ISO 8601 date time +strings to numeric timestamps and numeric timestamps to ISO 8601 date time strings. + +Both also support individual date components as separate function arguments, +in the following order: + +- year +- month +- day +- hour +- minute +- second +- millisecond + +All components following the *day* are optional and can be omitted. Note that no +time offset can be specified when using separate date components, and UTC / +Zulu time is used. + +The following calls to `DATE_TIMESTAMP()` are equivalent and all return +`1399472349522`: + +```aql +DATE_TIMESTAMP("2014-05-07T14:19:09.522") +DATE_TIMESTAMP("2014-05-07T14:19:09.522Z") +DATE_TIMESTAMP("2014-05-07 14:19:09.522") +DATE_TIMESTAMP("2014-05-07 14:19:09.522Z") +DATE_TIMESTAMP(2014, 5, 7, 14, 19, 9, 522) +DATE_TIMESTAMP(1399472349522) +``` + +The same is true for calls to `DATE_ISO8601()` that also accepts variable input +formats: + +```aql +DATE_ISO8601("2014-05-07T14:19:09.522Z") +DATE_ISO8601("2014-05-07 14:19:09.522Z") +DATE_ISO8601(2014, 5, 7, 14, 19, 9, 522) +DATE_ISO8601(1399472349522) +``` + +The above functions are all equivalent and return `"2014-05-07T14:19:09.522Z"`. + +### DATE_ISO8601() + +`DATE_ISO8601(date) → dateString` + +Return an ISO 8601 date time string from `date`. +The date time string always uses UTC / Zulu time, indicated by the `Z` at its end. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **dateString**: date and time expressed according to ISO 8601, in Zulu time + +--- + +`DATE_ISO8601(year, month, day, hour, minute, second, millisecond) → dateString` + +Return a ISO 8601 date time string from `date`, but allows to specify the individual +date components separately. All parameters after `day` are optional. + +- **year** (number): typically in the range 0..9999, e.g. `2017` +- **month** (number): 1..12 for January through December +- **day** (number): 1..31 (upper bound depends on number of days in month) +- **hour** (number, *optional*): 0..23 +- **minute** (number, *optional*): 0..59 +- **second** (number, *optional*): 0..59 +- **milliseconds** (number, *optional*): 0..999 +- returns **dateString**: date and time expressed according to ISO 8601, in Zulu time + +### DATE_TIMESTAMP() + +`DATE_TIMESTAMP(date) → timestamp` + +Create a timestamp value from `date`. The return value has millisecond precision. +To convert the return value to seconds, divide it by 1000. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **timestamp** (number): numeric timestamp + +--- + +`DATE_TIMESTAMP(year, month, day, hour, minute, second, millisecond) → timestamp` + +Create a timestamp value, but allows to specify the individual date components +separately. All parameters after `day` are optional. + +- **year** (number): typically in the range 0..9999, e.g. `2017` +- **month** (number): 1..12 for January through December +- **day** (number): 1..31 (upper bound depends on number of days in month) +- **hour** (number, *optional*): 0..23 +- **minute** (number, *optional*): 0..59 +- **second** (number, *optional*): 0..59 +- **milliseconds** (number, *optional*): 0..999 +- returns **timestamp** (number): numeric timestamp + +Negative values are not allowed, result in `null` and cause a warning. +Values greater than the upper range bound overflow to the larger components +(e.g. an hour of 26 is automatically turned into an additional day and two hours): + +```aql +DATE_TIMESTAMP(2016, 12, -1) // returns null and issues a warning +DATE_TIMESTAMP(2016, 2, 32) // returns 1456963200000, which is March 3rd, 2016 +DATE_TIMESTAMP(1970, 1, 1, 26) // returns 93600000, which is January 2nd, 1970, at 2 a.m. +``` + +### IS_DATESTRING() + +`IS_DATESTRING(value) → bool` + +Check if an arbitrary string is suitable for interpretation as date time string. + +- **value** (string): an arbitrary string +- returns **bool** (bool): `true` if `value` is a string that can be used + in a date function. This includes partial dates such as `2015` or `2015-10` and + strings containing invalid dates such as `2015-02-31`. The function returns + `false` for all non-string values, even if some of them may be usable in date + functions. + +## Processing + +### DATE_DAYOFWEEK() + +`DATE_DAYOFWEEK(date) → weekdayNumber` + +Return the weekday number of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **weekdayNumber** (number): 0..6 as follows: + - `0` – Sunday + - `1` – Monday + - `2` – Tuesday + - `3` – Wednesday + - `4` – Thursday + - `5` – Friday + - `6` – Saturday + +**Examples** + +```aql +--- +name: datedyofwk1 +description: | + The 29th of August in 2020 was a Saturday: +--- +RETURN DATE_DAYOFWEEK("2020-08-29") +``` + +```aql +--- +name: datedyofwk2 +description: | + The Unix epoch began on the 1st of January 1970, which was a Thursday: +--- +RETURN DATE_DAYOFWEEK(0) +``` + +### DATE_YEAR() + +`DATE_YEAR(date) → year` + +Return the year of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **year** (number): the year part of `date` as a number + +**Examples** + +```aql +--- +name: dateyr1 +description: | + Extract the year from a date time string: +--- +RETURN DATE_YEAR("2020-08-29") +``` + +```aql +--- +name: dateyr2 +description: | + Extract the year from a Unix timestamp: +--- +RETURN DATE_YEAR(0) +``` + +### DATE_MONTH() + +`DATE_MONTH(date) → month` + +Return the month of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **month** (number): the month part of `date` as a number + +**Examples** + +```aql +--- +name: datemn1 +description: | + Extract the month from a date time string: +--- +RETURN DATE_MONTH("2020-08-29") +``` + +```aql +--- +name: datemn2 +description: | + Extract the month from a Unix timestamp: +--- +RETURN DATE_MONTH(0) +``` + +### DATE_DAY() + +`DATE_DAY(date) → day` + +Return the day of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **day** (number): the day part of `date` as a number + +**Examples** + +```aql +--- +name: datedy1 +description: | + Extract the day from a date time string: +--- +RETURN DATE_DAY("2020-08-29") +``` + +```aql +--- +name: datedy2 +description: | + Extract the day from a Unix timestamp: +--- +RETURN DATE_DAY(0) +``` + +### DATE_HOUR() + +Return the hour of `date`. + +`DATE_HOUR(date) → hour` + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **hour** (number): the hour part of `date` as a number + +**Examples** + +```aql +--- +name: datehr1 +description: | + Extract the hour of a date time string: +--- +RETURN DATE_HOUR("2020-08-29T16:30:05.123") +``` + +```aql +--- +name: datehr2 +description: | + Extract the hour of a Unix timestamp: +--- +RETURN DATE_HOUR(14400000) +``` + +### DATE_MINUTE() + +`DATE_MINUTE(date) → minute` + +Return the minute of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **minute** (number): the minute part of `date` as a number + +**Examples** + +```aql +--- +name: datemin1 +description: | + Extract the minute of a date time string: +--- +RETURN DATE_MINUTE("2020-08-29T16:30:05.123") +``` + +```aql +--- +name: datemin2 +description: | + Extract the minute of a Unix timestamp: +--- +RETURN DATE_MINUTE(2520000) +``` + +### DATE_SECOND() + +`DATE_SECOND(date) → second` + +Return the second of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **second** (number): the seconds part of `date` as a number + +**Examples** + +```aql +--- +name: datesec1 +description: | + Extract the second of a date time string: +--- +RETURN DATE_SECOND("2020-08-29T16:30:05.123") +``` + +```aql +--- +name: datesec2 +description: | + Extract the second of a Unix timestamp: +--- +RETURN DATE_SECOND(1234567890) +``` + +### DATE_MILLISECOND() + +`DATE_MILLISECOND(date) → millisecond` + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **millisecond** (number): the milliseconds part of `date` as a number + +**Examples** + +```aql +--- +name: datemilsec1 +description: '' +--- +RETURN DATE_MILLISECOND("2020-08-29T16:30:05.123") +``` + +```aql +--- +name: datemilsec2 +description: | + Extract the millisecond of a Unix timestamp: +--- +RETURN DATE_MILLISECOND(1234567890) +``` + +### DATE_DAYOFYEAR() + +`DATE_DAYOFYEAR(date) → dayOfYear` + +Return the day of year of `date`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **dayOfYear** (number): the day of year number of `date`. + The return values range from 1 to 365, or 366 in a leap year respectively. + +**Examples** + +```aql +--- +name: datedyofyr1 +description: | + Extract the day of year from a date time string: +--- +RETURN DATE_DAYOFYEAR("2020-08-29") +``` + +```aql +--- +name: datedyofyr2 +description: | + Extract the day of year from a Unix timestamp: +--- +RETURN DATE_DAYOFYEAR(86400000) +``` + +### DATE_ISOWEEK() + +`DATE_ISOWEEK(date) → weekDate` + +Return the week number in the year of `date` according to ISO 8601. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **weekDate** (number): the ISO week number of `date`. The return values + range from 1 to 53. Monday is considered the first day of the week. There are no + fractional weeks, thus the last days in December may belong to the first week of + the next year, and the first days in January may be part of the previous year's + last week. + +**Examples** + +```aql +--- +name: dateisofwk1 +description: | + Determine the week number from a date time string: +--- +RETURN DATE_ISOWEEK("2020-08-29") +``` + +```aql +--- +name: dateisofwk2 +description: | + Determine the week number from a Unix timestamp: +--- +RETURN DATE_ISOWEEK(1234567890) +``` + +### DATE_ISOWEEKYEAR() + +`DATE_ISOWEEKYEAR(date) → weekAndYear` + +Return the week number of `date` according to ISO 8601 and the year the +week belongs to. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **weekAndYear** (object): an object with two attributes + - **week** (number): the ISO week number of `date`. The values range from 1 to 53. + Monday is considered the first day of the week. There are no fractional weeks, + thus the last days in December may belong to the first week of the next year, + and the first days in January may be part of the previous year's last week. + - **year** (number): the year to which the ISO week number belongs to + +**Examples** + +```aql +--- +name: aqlDateIsoWeekYear1 +description: | + January 1st of 2023 is part of the previous year's last week: +--- +RETURN DATE_ISOWEEKYEAR("2023-01-01") +``` + +```aql +--- +name: aqlDateIsoWeekYear2 +description: | + The last two days of 2019 are part of the next year's first week: +--- +RETURN DATE_ISOWEEKYEAR("2019-12-30") +``` + +### DATE_LEAPYEAR() + +`DATE_LEAPYEAR(date) → leapYear` + +Return whether `date` is in a leap year. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **leapYear** (bool): `true` if `date` is in a leap year, `false` otherwise + +**Examples** + +```aql +--- +name: datelpyr1 +description: | + 2020 was a leap year: +--- +RETURN DATE_LEAPYEAR("2020-01-01") +``` + +```aql +--- +name: datelpyr2 +description: | + 2021 was not a leap year: +--- +RETURN DATE_LEAPYEAR("2021-01-01") +``` + +### DATE_QUARTER() + +`DATE_QUARTER(date) → quarter` + +Return which quarter `date` belongs to. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **quarter** (number): the quarter of the given date (1-based): + - `1` – January, February, March + - `2` – April, May, June + - `3` – July, August, September + - `4` – October, November, December + +**Examples** + +```aql +--- +name: dateqtr1 +description: | + Determine the quarter of a date time string: +--- +RETURN DATE_QUARTER("2020-08-29") +``` + +### DATE_DAYS_IN_MONTH() + +Return the number of days in the month of `date`. + +`DATE_DAYS_IN_MONTH(date) → daysInMonth` + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- returns **daysInMonth** (number): the number of days in `date`'s month (28..31) + +**Examples** + +```aql +--- +name: datedysmn1 +description: | + Determine the number of days in August using a date time string: +--- +RETURN DATE_DAYS_IN_MONTH("2020-08-01") +``` + +```aql +--- +name: datedysmn2 +description: | + Determine the number of days in September using a date time string: +--- +RETURN DATE_DAYS_IN_MONTH("2020-09-01") +``` + +```aql +--- +name: datedysmn3 +description: | + Determine the number of days in February in a leap year using a date time string: +--- +RETURN DATE_DAYS_IN_MONTH("2020-02-01") +``` + +```aql +--- +name: datedysmn4 +description: | + Determine the number of days in February in a a non-leap year using a date time string: +--- +RETURN DATE_DAYS_IN_MONTH("2021-02-01") +``` + +```aql +--- +name: datedysmn5 +description: | + Determine the number of days in the month using a Unix timestamp: +--- +RETURN DATE_DAYS_IN_MONTH(3045600000) +``` + +### DATE_TRUNC() + +`DATE_TRUNC(date, unit) → isoDate` + +Truncates the given date after `unit` and returns the modified date. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **unit** (string): either of the following to specify the time unit (case-insensitive): + - `"y"`, `"year"`, `"years"` + - `"m"`, `"month"`, `"months"` + - `"d"`, `"day"`, `"days"` + - `"h"`, `"hour"`, `"hours"` + - `"i"`, `"minute"`, `"minutes"` + - `"s"`, `"second"`, `"seconds"` + - `"f"`, `"millisecond"`, `"milliseconds"` +- returns **isoDate** (string): the truncated ISO 8601 date time string + +**Examples** + +```aql +DATE_TRUNC('2017-02-03', 'month') // 2017-02-01T00:00:00.000Z +DATE_TRUNC('2017-02-03 04:05:06', 'hours') // 2017-02-03 04:00:00.000Z +DATE_TRUNC('2023-03-25 23:00:00', 'day') // 2023-03-25T00:00:00.000Z +``` + +```aql +--- +name: dateTruncGroup +description: | + Truncate date time strings comprised of a year, month, and day to the year and + group another attribute by it: +bindVars: + { + "data": [ + { "date": "2018-03-05", "value": "Spring" }, + { "date": "2018-07-11", "value": "Summer" }, + { "date": "2018-10-26", "value": "Autumn" }, + { "date": "2019-01-09", "value": "Winter" }, + { "date": "2019-04-02", "value": "Spring" } + ] + } +--- +RETURN MERGE( + FOR doc IN @data + COLLECT q = DATE_TRUNC(doc.date, "year") INTO bucket + RETURN { [DATE_YEAR(q)]: bucket[*].doc.value } +) +``` + +### DATE_ROUND() + +`DATE_ROUND(date, amount, unit) → isoDate` + +Bin a date/time into a set of equal-distance buckets, to be used for +grouping. + +- **date** (string\|number): a date string or timestamp +- **amount** (number): number of `unit`s. Must be a positive integer value. +- **unit** (string): either of the following to specify the time unit (case-insensitive): + - `"d"`, `"day"`, `"days"` + - `"h"`, `"hour"`, `"hours"` + - `"i"`, `"minute"`, `"minutes"` + - `"s"`, `"second"`, `"seconds"` + - `"f"`, `"millisecond"`, `"milliseconds"` +- returns **isoDate** (string): the rounded ISO 8601 date time string + +**Examples** + +```aql +DATE_ROUND('2000-04-28T11:11:11.111Z', 1, 'day') // 2000-04-28T00:00:00.000Z +DATE_ROUND('2000-04-10T11:39:29Z', 15, 'minutes') // 2000-04-10T11:30:00.000Z +DATE_ROUND('2023-03-25T23:55:55.555Z', 1, 'day') // 2023-03-25T00:00:00.000Z +``` + +```aql +--- +name: dateRoundAggregate +description: | + Round full date time strings to 5 minutes and aggregate temperature readings + by these time buckets: + +bindVars: + { + "sensorData": [ + { "timestamp": "2019-12-04T21:17:52.583Z", "temp": 20.6 }, + { "timestamp": "2019-12-04T21:19:53.516Z", "temp": 20.2 }, + { "timestamp": "2019-12-04T21:21:53.610Z", "temp": 19.9 }, + { "timestamp": "2019-12-04T21:23:52.522Z", "temp": 19.8 }, + { "timestamp": "2019-12-04T21:25:52.988Z", "temp": 19.8 }, + { "timestamp": "2019-12-04T21:27:54.005Z", "temp": 19.7 } + ] + } +--- +FOR doc IN @sensorData + COLLECT + date = DATE_ROUND(doc.timestamp, 5, "minutes") + AGGREGATE + count = COUNT(1), + avg = AVG(doc.temp), + min = MIN(doc.temp), + max = MAX(doc.temp) + RETURN { date, count, avg, min, max } +``` + +### DATE_FORMAT() + +`DATE_FORMAT(date, format) → str` + +Format a date according to the given format string. + +- **date** (string\|number): a date string or timestamp +- **format** (string): a format string, see below +- returns **str** (string): a formatted date string + +The `format` parameter supports the following placeholders (case-insensitive): + +- `%t` – timestamp, in milliseconds since midnight 1970-01-01 +- `%z` – ISO date (0000-00-00T00:00:00.000Z) +- `%w` – day of week (0..6) +- `%y` – year (0..9999) +- `%yy` – year (00..99), abbreviated (last two digits) +- `%yyyy` – year (0000..9999), padded to length of 4 +- `%yyyyyy` – year (-009999 .. +009999), with sign prefix and padded to length of 6 +- `%m` – month (1..12) +- `%mm` – month (01..12), padded to length of 2 +- `%d` – day (1..31) +- `%dd` – day (01..31), padded to length of 2 +- `%h` – hour (0..23) +- `%hh` – hour (00..23), padded to length of 2 +- `%i` – minute (0..59) +- `%ii` – minute (00..59), padded to length of 2 +- `%s` – second (0..59) +- `%ss` – second (00..59), padded to length of 2 +- `%f` – millisecond (0..999) +- `%fff` – millisecond (000..999), padded to length of 3 +- `%x` – day of year (1..366) +- `%xxx` – day of year (001..366), padded to length of 3 +- `%k` – ISO week number of year (1..53) +- `%kk` – ISO week number of year (01..53), padded to length of 2 +- `%l` – leap year (0 or 1) +- `%q` – quarter (1..4) +- `%a` – days in month (28..31) +- `%mmm` – abbreviated English name of month (Jan..Dec) +- `%mmmm` – English name of month (January..December) +- `%www` – abbreviated English name of weekday (Sun..Sat) +- `%wwww` – English name of weekday (Sunday..Saturday) +- `%&` – special escape sequence for rare occasions +- `%%` – literal % +- `%` – ignored + +`%yyyy` does not enforce a length of 4 for years before 0 and past 9999. +The same format as for `%yyyyyy` is used instead. `%yy` preserves the +sign for negative years and may thus return 3 characters in total. + +Single `%` characters are ignored. Use `%%` for a literal `%`. To resolve +ambiguities like in `%mmonth` (unpadded month number + the string `month`) +between `%mm` + `onth` and `%m` + `month`, use the escape sequence `%&`: +`%m%&month`. + +Note that `DATE_FORMAT()` is a rather costly operation and may not be suitable for large +datasets (like over 1 million dates). If possible, avoid formatting dates on +server-side and leave it up to the client to do so. This function should only +be used for special date comparisons or to store the formatted dates in the +database. For better performance, use the primitive `DATE_*()` functions +together with `CONCAT()` if possible. + +**Examples** + +```aql +DATE_FORMAT(DATE_NOW(), "%q/%yyyy") // quarter and year (e.g. "3/2015") +DATE_FORMAT(DATE_NOW(), "%dd.%mm.%yyyy %hh:%ii:%ss,%fff") // e.g. "18.09.2015 15:30:49,374" +DATE_FORMAT("1969", "Summer of '%yy") // "Summer of '69" +DATE_FORMAT("2016", "%%l = %l") // "%l = 1" (2016 is a leap year) +DATE_FORMAT("2016-03-01", "%xxx%") // "063", trailing % ignored +``` + +```aql +--- +name: dateFormat +description: | + Example calls of the formatting function and their results: +bindVars: + { + "formats": [ + { "date": "2023-03-25T23:00:00.000Z", "placeholder": "%w", "equalTo": "DATE_DAYOFWEEK" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%yyyy", "equalTo": "DATE_YEAR" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%m", "equalTo": "DATE_MONTH" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%d", "equalTo": "DATE_DAY" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%h", "equalTo": "DATE_HOUR" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%i", "equalTo": "DATE_MINUTE" }, + { "date": "2023-12-31T23:00:23.000Z", "placeholder": "%s", "equalTo": "DATE_SECOND" }, + { "date": "2023-12-31T23:00:00.031Z", "placeholder": "%f", "equalTo": "DATE_MILLISECOND" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%x", "equalTo": "DATE_DAYOFYEAR" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%k", "equalTo": "DATE_ISOWEEK" }, + { "date": "2016-12-31T23:00:00.000Z", "placeholder": "%l", "equalTo": "DATE_LEAPYEAR" }, + { "date": "2023-12-31T23:00:00.000Z", "placeholder": "%q", "equalTo": "DATE_QUARTER" }, + { "date": "2023-11-30T23:00:00.000Z", "placeholder": "%a", "equalTo": "DATE_DAYS_IN_MONTH" }, + { "date": "2023-11-30T23:00:00.000Z", "placeholder": "%t", "equalTo": "DATE_TIMESTAMP" } + ] + } +--- +FOR format IN @formats + RETURN CONCAT( + format.equalTo, + "('", + format.date, + "') = ", + DATE_FORMAT(format.date, format.placeholder) + ) +``` + +## Comparison and calculation + +### DATE_ADD() + +`DATE_ADD(date, amount, unit) → isoDate` + +Add `amount` given in `unit` to `date` and return the calculated date. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **amount** (number\|string): number of `unit`s to add (positive value) or + subtract (negative value). It is recommended to use positive values only, + and use [`DATE_SUBTRACT()`](#date_subtract) for subtractions instead. +- **unit** (string): either of the following to specify the time unit to add or + subtract (case-insensitive): + - `"y"`, `"year"`, `"years"` + - `"m"`, `"month"`, `"months"` + - `"w"`, `"week"`, `"weeks"` + - `"d"`, `"day"`, `"days"` + - `"h"`, `"hour"`, `"hours"` + - `"i"`, `"minute"`, `"minutes"` + - `"s"`, `"second"`, `"seconds"` + - `"f"`, `"millisecond"`, `"milliseconds"` +- returns **isoDate** (string): the calculated ISO 8601 date time string + +```aql +DATE_ADD(DATE_NOW(), -1, "day") // yesterday; also see DATE_SUBTRACT() +DATE_ADD(DATE_NOW(), 3, "months") // in three months +DATE_ADD(DATE_ADD("2015-04-01", 5, "years"), 1, "month") // May 1st 2020 +DATE_ADD("2015-04-01", 12*5 + 1, "months") // also May 1st 2020 +DATE_ADD(DATE_TIMESTAMP(DATE_YEAR(DATE_NOW()), 12, 24), -4, "years") // Christmas four years ago +DATE_ADD(DATE_ADD("2016-02", "month", 1), -1, "day") // last day of February (29th, because 2016 is a leap year!) +``` + +--- + +`DATE_ADD(date, isoDuration) → isoDate` + +You may also pass an ISO duration string as `amount` and leave out `unit`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **isoDuration** (string): an ISO 8601 duration string to add to `date`, see below +- returns **isoDate** (string): the calculated ISO 8601 date time string + +The format is `P_Y_M_W_DT_H_M_._S`, where underscores stand for digits and +letters for time intervals - except for the separators `P` (period) and `T` (time). +The meaning of the other letters are: +- `Y` – years +- `M` – months (if before T) +- `W` – weeks +- `D` – days +- `H` – hours +- `M` – minutes (if after T) +- `S` – seconds (optionally with 3 decimal places for milliseconds) + +The string must be prefixed by a `P`. A separating `T` is only required if +`H`, `M` and/or `S` are specified. You only need to specify the needed pairs +of letters and numbers. + +```aql +DATE_ADD(DATE_NOW(), "P1Y") // add 1 year +DATE_ADD(DATE_NOW(), "P3M2W") // add 3 months and 2 weeks +DATE_ADD(DATE_NOW(), "P5DT26H") // add 5 days and 26 hours (=6 days and 2 hours) +DATE_ADD("2000-01-01", "PT4H") // add 4 hours +DATE_ADD("2000-01-01", "PT30M44.4S") // add 30 minutes, 44 seconds and 400 ms +DATE_ADD("2000-01-01", "P1Y2M3W4DT5H6M7.89S") // add a bit of everything +``` + +### DATE_SUBTRACT() + +`DATE_SUBTRACT(date, amount, unit) → isoDate` + +Subtract `amount` given in `unit` from `date` and return the calculated date. + +It works the same as [`DATE_ADD()`](#date_add), except that it subtracts. It is +equivalent to calling `DATE_ADD()` with a negative amount, except that +`DATE_SUBTRACT()` can also subtract ISO durations. Note that negative ISO +durations are not supported (i.e. starting with `-P`, like `-P1Y`). + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **amount** (number\|string): number of `unit`s to subtract (positive value) or + add (negative value). It is recommended to use positive values only, + and use [`DATE_ADD()`](#date_add) for additions instead. +- **unit** (string): either of the following to specify the time unit to add or + subtract (case-insensitive): + - `"y"`, `"year"`, `"years"` + - `"m"`, `"month"`, `"months"` + - `"w"`, `"week"`, `"weeks"` + - `"d"`, `"day"`, `"days"` + - `"h"`, `"hour"`, `"hours"` + - `"i"`, `"minute"`, `"minutes"` + - `"s"`, `"second"`, `"seconds"` + - `"f"`, `"millisecond"`, `"milliseconds"` +- returns **isoDate** (string): the calculated ISO 8601 date time string + +--- + +`DATE_SUBTRACT(date, isoDuration) → isoDate` + +You may also pass an ISO duration string as `amount` and leave out `unit`. + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **isoDuration** (string): an ISO 8601 duration string to subtract from `date`, + see below +- returns **isoDate** (string): the calculated ISO 8601 date time string + +The format is `P_Y_M_W_DT_H_M_._S`, where underscores stand for digits and +letters for time intervals - except for the separators `P` (period) and `T` (time). +The meaning of the other letters are: +- `Y` – years +- `M` – months (if before T) +- `W` – weeks +- `D` – days +- `H` – hours +- `M` – minutes (if after T) +- `S` – seconds (optionally with 3 decimal places for milliseconds) + +The string must be prefixed by a `P`. A separating `T` is only required if +`H`, `M` and/or `S` are specified. You only need to specify the needed pairs +of letters and numbers. + +```aql +DATE_SUBTRACT(DATE_NOW(), 1, "day") // yesterday +DATE_SUBTRACT(DATE_TIMESTAMP(DATE_YEAR(DATE_NOW()), 12, 24), 4, "years") // Christmas four years ago +DATE_SUBTRACT(DATE_ADD("2016-02", "month", 1), 1, "day") // last day of February (29th, because 2016 is a leap year!) +DATE_SUBTRACT(DATE_NOW(), "P4D") // four days ago +DATE_SUBTRACT(DATE_NOW(), "PT1H3M") // 1 hour and 30 minutes ago +``` + +### DATE_DIFF() + +`DATE_DIFF(date1, date2, unit, asFloat) → diff` + +Calculate the difference between two dates in given time `unit`, optionally +with decimal places. + +- **date1** (number\|string): numeric timestamp or ISO 8601 date time string +- **date2** (number\|string): numeric timestamp or ISO 8601 date time string +- **unit** (string): either of the following to specify the time unit to return the + difference in (case-insensitive): + - `"y"`, `"year"`, `"years"` + - `"m"`, `"month"`, `"months"` + - `"w"`, `"week"`, `"weeks"` + - `"d"`, `"day"`, `"days"` + - `"h"`, `"hour"`, `"hours"` + - `"i"`, `"minute"`, `"minutes"` + - `"s"`, `"second"`, `"seconds"` + - `"f"`, `"millisecond"`, `"milliseconds"` +- **asFloat** (boolean, *optional*): if set to `true`, decimal places are + preserved in the result. The default is `false` and an integer is returned. +- returns **diff** (number): the calculated difference as number in `unit`. + The value is negative if `date2` is before `date1`. + +```aql +--- +name: datediff1 +description: | + Determine how many days it is from New Year's Eve until April Fools' day: +--- +RETURN DATE_DIFF("2023-12-01", "2024-04-01", "days") +``` + +### DATE_COMPARE() + +`DATE_COMPARE(date1, date2, unitRangeStart, unitRangeEnd) → bool` + +Check if two partial dates match. + +- **date1** (number\|string): numeric timestamp or ISO 8601 date time string +- **date2** (number\|string): numeric timestamp or ISO 8601 date time string +- **unitRangeStart** (string): unit to start from, see below +- **unitRangeEnd** (string, *optional*): unit to end with, leave out to only + compare the component as specified by `unitRangeStart`. An error is raised if + `unitRangeEnd` is a unit before `unitRangeStart`. +- returns **bool** (bool): `true` if the dates match, `false` otherwise + +The parts to compare are defined by a range of time units. The full range is: +years, months, days, hours, minutes, seconds, milliseconds (in this order). + +All components of `date1` and `date2` as specified by the range are compared. +You can refer to the units as: + +- `"y"`, `"year"`, `"years"` +- `"m"`, `"month"`, `"months"` +- `"d"`, `"day"`, `"days"` +- `"h"`, `"hour"`, `"hours"` +- `"i"`, `"minute"`, `"minutes"` +- `"s"`, `"second"`, `"seconds"` +- `"f"`, `"millisecond"`, `"milliseconds"` + +**Examples** + +```aql +// Compare months and days, true on birthdays if you're born on 4th of April +DATE_COMPARE("1985-04-04", DATE_NOW(), "months", "days") + +// Only matches on one day if the current year is a leap year! +// You may want to add or subtract one day from date1 to match every year. +DATE_COMPARE("1984-02-29", DATE_NOW(), "months", "days") + +// compare years, months and days (true, because it's the same day) +DATE_COMPARE("2001-01-01T15:30:45.678Z", "2001-01-01T08:08:08.008Z", "years", "days") +``` + +You can directly compare ISO date **strings** if you want to find dates before or +after a certain date, or in between two dates (`>=`, `>`, `<`, `<=`). +No special date function is required. Equality tests (`==` and `!=`) only +match the exact same date and time, however. You may use `SUBSTRING()` to +compare partial date strings, `DATE_COMPARE()` is basically a convenience +function for that. However, neither is really required to limit a search to a +certain day as demonstrated here: + +```aql +FOR doc IN coll + FILTER doc.date >= "2015-05-15" AND doc.date < "2015-05-16" + RETURN doc +``` + +Every ISO date on that day is greater than or equal to `2015-05-15` in a string +comparison (e.g. `2015-05-15T11:30:00.000Z`). Dates before `2015-05-15` are smaller +and therefore filtered out by the first condition. Every date past `2015-05-15` is +greater than this date in a string comparison, and therefore filtered out by the +second condition. The result is that the time components in the dates you compare +with are "ignored". The query returns every document with `date` ranging from +`2015-05-15T00:00:00.000Z` to `2015-05-15T23:99:99.999Z`. It would also include +`2015-05-15T24:00:00.000Z`, but that date is actually `2015-05-16T00:00:00.000Z` +and can only occur if inserted manually (you may want to pass dates through +[`DATE_ISO8601()`](#date_iso8601) to ensure a correct date representation). + +Leap days in leap years (29th of February) must be always handled manually, +if you require so (e.g. birthday checks): + +```aql +LET today = DATE_NOW() +LET noLeapYear = NOT DATE_LEAPYEAR(today) + +FOR user IN users + LET birthday = noLeapYear AND + DATE_MONTH(user.birthday) == 2 AND + DATE_DAY(user.birthday) == 29 + ? DATE_SUBTRACT(user.birthday, 1, "day") /* treat like 28th in non-leap years */ + : user.birthday + FILTER DATE_COMPARE(today, birthday, "month", "day") + /* includes leaplings on the 28th of February in non-leap years, + * but excludes them in leap years which do have a 29th February. + * Replace DATE_SUBTRACT() by DATE_ADD() to include them on the 1st of March + * in non-leap years instead (depends on local jurisdiction). + */ + RETURN user +``` + +### DATE_UTCTOLOCAL() + +Introduced in: v3.8.0 + +Converts `date` assumed in Zulu time (UTC) to local `timezone`. + +It takes historic daylight saving times into account. + +`DATE_UTCTOLOCAL(date, timezone, zoneinfo) → date` + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **timezone** (string): + [IANA timezone name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), + e.g. `"America/New_York"`, `"Europe/Berlin"` or `"UTC"`. + Use `"America/Los_Angeles"` for Pacific time (PST/PDT). + Throws an error if the timezone is not known to ArangoDB. +- **zoneinfo** (boolean, *optional*): if set to `true`, an object with timezone + information is returned. The default is `false` and a date string is returned +- returns **date** (string\|object): an ISO 8601 date time string in + unqualified local time, or an object with the following attributes: + - **local** (string): ISO 8601 date time string in unqualified local time + - **tzdb** (string): version of the timezone database used (e.g. `"2020f"`) + - **zoneInfo**: (object): timezone information + - **name** (string): timezone abbreviation (GMT, PST, CET, ...) + - **begin** (string\|null): begin of the timezone effect as UTC date time string + - **end** (string\|null): end of the timezone effect as UTC date time string + - **dst** (boolean): `true` when daylight saving time (DST) is active, + `false` otherwise + - **offset** (number): offset to UTC in seconds + +```aql +--- +name: aqlDateTimeToLocal_1 +description: | + Convert a date time string to different local timezones: +--- +RETURN [ + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000", "Europe/Berlin"), + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000", "America/New_York"), + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000", "UTC") +] +``` + +```aql +--- +name: aqlDateTimeToLocal_2 +description: | + Convert date time strings with and without UTC indicator (`Z`), with a timezone + offset, and a Unix timestamp to local time: +--- +RETURN [ + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000", "Asia/Shanghai"), + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000Z", "Asia/Shanghai"), + DATE_UTCTOLOCAL("2020-03-15T00:00:00.000-02:00", "Asia/Shanghai"), + DATE_UTCTOLOCAL(1584230400000, "Asia/Shanghai") +] +``` + +```aql +--- +name: aqlDateTimeToLocal_3 +description: | + Convert to local time and include timezone information: +--- +RETURN DATE_UTCTOLOCAL(DATE_NOW(), "Africa/Lagos", true) +``` + +### DATE_LOCALTOUTC() + +Introduced in: v3.8.0 + +Converts `date` assumed in local `timezone` to Zulu time (UTC). + +It takes historic daylight saving times into account. + +`DATE_LOCALTOUTC(date, timezone, zoneinfo) → date` + +- **date** (number\|string): numeric timestamp or ISO 8601 date time string +- **timezone** (string): + [IANA timezone name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), + e.g. `"America/New_York"`, `"Europe/Berlin"` or `"UTC"`. + Use `"America/Los_Angeles"` for Pacific time (PST/PDT). + Throws an error if the timezone is not known to ArangoDB. +- **zoneinfo** (boolean, *optional*): if set to `true`, an object with timezone + information is returned. The default is `false` and a date string is returned +- returns **date** (string\|object): an ISO 8601 date time string in + Zulu time (UTC), or an object with the following attributes: + - **utc** (string): ISO 8601 date time string in Zulu time (UTC) + - **tzdb** (string): version of the timezone database used (e.g. `"2020f"`) + - **zoneInfo**: (object): timezone information + - **name** (string): timezone abbreviation (GMT, PST, CET, ...) + - **begin** (string\|null): begin of the timezone effect as UTC date time string + - **end** (string\|null): end of the timezone effect as UTC date time string + - **dst** (boolean): `true` when daylight saving time (DST) is active, + `false` otherwise + - **offset** (number): offset to UTC in seconds + +```aql +--- +name: aqlDateTimeToUTC_1 +description: | + Convert a date time string from different local timezones to UTC: +--- +RETURN [ + DATE_LOCALTOUTC("2020-03-15T00:00:00.000", "Europe/Berlin"), + DATE_LOCALTOUTC("2020-03-15T00:00:00.000", "America/New_York"), + DATE_LOCALTOUTC("2020-03-15T00:00:00.000", "UTC") +] +``` + +```aql +--- +name: aqlDateTimeToUTC_2 +description: | + Convert date time strings with and without UTC indicator (`Z`), with a timezone + offset, and a Unix timestamp to UTC time: +--- +RETURN [ + DATE_LOCALTOUTC("2020-03-15T00:00:00.000", "Asia/Shanghai"), + DATE_LOCALTOUTC("2020-03-15T00:00:00.000Z", "Asia/Shanghai"), + DATE_LOCALTOUTC("2020-03-15T00:00:00.000-02:00", "Asia/Shanghai"), + DATE_LOCALTOUTC(1584230400000, "Asia/Shanghai") +] +``` + +```aql +--- +name: aqlDateTimeToUTC_3 +description: | + Convert to UTC time and include timezone information: +--- +RETURN DATE_LOCALTOUTC("2021-03-16T12:00:00.000", "Africa/Lagos", true) +``` + +### DATE_TIMEZONE() + +Introduced in: v3.8.0 + +Returns system timezone ArangoDB is running on. + +For cloud servers, this is most likely `"Etc/UTC"`. + +`DATE_TIMEZONE() → timezone` + +- returns **timezone** (string): + [IANA timezone name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + of the server timezone. + +### DATE_TIMEZONES() + +Introduced in: v3.8.0 + +Returns all valid timezone names. + +`DATE_TIMEZONES() → timezones` + +- returns **timezones** (array): an array of + [IANA timezone names](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + +## Working with dates and indexes + +There are two recommended ways to store timestamps in ArangoDB: + - string: UTC timestamp with [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) + - number: [unix timestamp](https://en.wikipedia.org/wiki/Unix_time) with millisecond precision + +The sort order of both is identical due to the sort properties of ISO date strings. +You can't mix both types, numbers and strings, in a single attribute however. + +You can use [persistent indexes](../../indexes-and-search/indexing/working-with-indexes/persistent-indexes.md) with both date types. +When choosing string representations, you can work with string comparisons (less than, +greater than etc.) to express time ranges in your queries while still utilizing +persistent indexes: + +```js +--- +name: working_with_date_time +description: '' +--- +db._create("exampleTime"); +var timestamps = [ + "2014-05-07T14:19:09.522", + "2014-05-07T21:19:09.522", + "2014-05-08T04:19:09.522", + "2014-05-08T11:19:09.522", + "2014-05-08T18:19:09.522" +]; +for (i = 0; i < 5; i++) { + db.exampleTime.save({value:i, ts: timestamps[i]}); +} +db._query(` + FOR d IN exampleTime + FILTER d.ts > '2014-05-07T14:19:09.522' AND d.ts < '2014-05-08T18:19:09.522' + RETURN d +`).toArray() +~addIgnoreCollection("example") +~db._drop("exampleTime") +``` + +The first and the last timestamp in the array are excluded from the result by the `FILTER`. + +## Limitations + +Note that dates before the year 1583 aren't allowed by the +[ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard by default, because +they lie before the official introduction of the Gregorian calendar and may thus +be incorrect or invalid. All AQL date functions apply the same rules to every +date according to the Gregorian calendar system, even if inappropriate. That +does not constitute a problem, unless you deal with dates prior to 1583 and +especially years before Christ. The standard allows negative years, but requires +special treatment of positive years too, if negative years are used (e.g. +`+002015-05-15` and `-000753-01-01`). This is rarely used however, and AQL does +not use the 7-character version for years between 0 and 9999 in ISO strings. +Keep in mind that they can't be properly compared to dates outside that range. +Sorting of negative dates does not result in a meaningful order, with years longer +ago last, but months, days and the time components in otherwise correct order. + +Leap seconds are ignored, just as they are in JavaScript as per +[ECMAScript Language Specifications](http://www.ecma-international.org/ecma-262/5.1/#sec-15.9.1.1). diff --git a/site/content/arangodb/oem/aql/functions/document-object.md b/site/content/arangodb/oem/aql/functions/document-object.md new file mode 100644 index 0000000000..4394f6fc4c --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/document-object.md @@ -0,0 +1,1023 @@ +--- +title: Document and object functions in AQL +menuTitle: Document / Object +weight: 25 +description: >- + AQL provides functions to operate on objects respectively document values +--- +You can use the below listed functions with the +[object data type](../../concepts/data-structure/documents/_index.md#data-types). +Also see [object access](../fundamentals/data-types.md#objects--documents) for +additional language constructs for objects. + +## ATTRIBUTES() + +`ATTRIBUTES(document, removeSystemAttrs, sort) → strArray` + +Return the top-level attribute keys of the `document` as an array. +Optionally omit system attributes and sort the array. + +To return the attribute values instead, see the [`VALUES()` function](#values). + +- **document** (object): an arbitrary document / object +- **removeSystemAttrs** (bool, *optional*): whether all system attributes + (starting with an underscore, such as `_key` and `_id`) shall be omitted in + the result. The default is `false`. +- **sort** (bool, *optional*): optionally sort the resulting array alphabetically. + The default is `false` and will return the attribute names in any order. +- returns **strArray** (array): the attribute keys of the input `document` as an + array of strings + +**Examples** + +Return the attribute keys of an object: + +```aql +--- +name: aqlAttributes +description: '' +--- +RETURN ATTRIBUTES( { "foo": "bar", "_key": "123", "_custom": "yes" } ) +``` + +Return the attribute keys of an object but omit system attributes: + +```aql +--- +name: aqlAttributesRemoveInternal +description: '' +--- +RETURN ATTRIBUTES( { "foo": "bar", "_key": "123", "_custom": "yes" }, true ) +``` + +Return the attribute keys of an object in alphabetic order: + +```aql +--- +name: aqlAttributesSort +description: '' +--- +RETURN ATTRIBUTES( { "foo": "bar", "_key": "123", "_custom": "yes" }, false, true ) +``` + +Complex example to count how often every top-level attribute key occurs in the +documents of a collection (expensive on large collections): + +```aql +LET attributesPerDocument = ( + FOR doc IN collection RETURN ATTRIBUTES(doc, true) +) +FOR attributeArray IN attributesPerDocument + FOR attribute IN attributeArray + COLLECT attr = attribute WITH COUNT INTO count + SORT count DESC + RETURN {attr, count} +``` + +## COUNT() + +This is an alias for [`LENGTH()`](#length). + +## HAS() + +`HAS(document, attributeName) → isPresent` + +Test whether an attribute is present in the provided document. + +- **document** (object): an arbitrary document / object +- **attributeName** (string): the attribute key to test for +- returns **isPresent** (bool): `true` if `document` has an attribute named + `attributeName`, and `false` otherwise. Also returns `true` if the attribute + has a falsy value (`null`, `0`, `false`, empty string `""`) + +The function checks if the specified attribute exists, regardless of its value. +Other ways of testing for the existence of an attribute may behave differently +if the attribute has a falsy value or is not present (implicitly `null` on +object access): + +```aql +!!{ name: "" }.name // false +HAS( { name: "" }, "name") // true + +{ name: null }.name == null // true +{ }.name == null // true +HAS( { name: null }, "name" ) // true +HAS( { }, "name" ) // false +``` + +Note that `HAS()` cannot utilize indexes. If it is not necessary to distinguish +between explicit and implicit *null* values in your query, you may use an equality +comparison to test for *null* and create a non-sparse index on the attribute you +want to test against: + +```aql +FILTER !HAS(doc, "name") // cannot use indexes +FILTER IS_NULL(doc, "name") // cannot use indexes +FILTER doc.name == null // can utilize non-sparse indexes +``` + +**Examples** + +Check whether the example object has a `name` attribute key: + +```aql +--- +name: aqlHas_1 +description: '' +--- +RETURN HAS( { name: "Jane" }, "name" ) +``` + +Check whether the example object has an `age` attribute key: + +```aql +--- +name: aqlHas_2 +description: '' +--- +RETURN HAS( { name: "Jane" }, "age" ) +``` + +Falsy attribute values like `null` still count as the attribute being present: + +```aql +--- +name: aqlHas_3 +description: '' +--- +RETURN HAS( { name: null }, "name" ) +``` + +## IS_SAME_COLLECTION() + +`IS_SAME_COLLECTION(collectionName, documentIdentifier) → isSame` + +Test whether the `documentIdentifier` has `collectionName` as collection. + +The function does not validate whether the collection actually contains the +specified document. It only compares the name of the specified collection +with the collection name part of the specified document. + +- **collectionName** (string): the name of a collection as string +- **documentIdentifier** (string\|object): a document identifier string + (e.g. `_users/1234`) or an object with an `_id` attribute (e.g. a document + from a collection). +- returns **isSame** (bool): `true` if the collection of `documentIdentifier` is the + same as `collectionName`, or `false` if it is not. If `documentIdentifier` is an + object without an `_id` attribute or anything other than a string or object, + then `null` is returned and a warning is raised. + +**Examples** + +```aql +--- +name: aqlIsSameCollection +description: '' +--- +RETURN [ + IS_SAME_COLLECTION( "_users", "_users/my-user" ), + IS_SAME_COLLECTION( "_users", { _id: "_users/my-user" } ), + IS_SAME_COLLECTION( "_users", "foobar/baz"), + IS_SAME_COLLECTION( "_users", { _id: "something/else" } ) +] +``` + +## KEEP() + +`KEEP(document, attributeName1, attributeName2, ... attributeNameN) → doc` + +Keep only the attributes `attributeName` to `attributeNameN` of `document`. +All other attributes will be removed from the result. + +To do the opposite, see [`UNSET()`](#unset). + +- **document** (object): a document / object +- **attributeNames** (string, *repeatable*): an arbitrary number of attribute + names as multiple arguments +- returns **doc** (object): a document with only the specified attributes at + the top-level + +**Examples** + +Keep the top-level `foo` attribute, preserving its nested object: + +```aql +--- +name: aqlKeep_1 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP(doc, "foo") +``` + +Keep the top-level `bar` attribute, which the example object does not have, +resulting in an empty object: + +```aql +--- +name: aqlKeep_2 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP(doc, "bar") +``` + +Keep the top-level `baz` attribute: + +```aql +--- +name: aqlKeep_3 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP(doc, "baz") +``` + +Keep multiple top-level attributes (`foo` and `baz`): + +```aql +--- +name: aqlKeep_4 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP(doc, "foo", "baz") +``` + +--- + +`KEEP(document, attributeNameArray) → doc` + +- **document** (object): a document / object +- **attributeNameArray** (array): an array of attribute names as strings +- returns **doc** (object): a document with only the specified attributes at + the top-level + +**Examples** + +Keep multiple top-level attributes (`foo` and `baz`), by passing an array of the +attribute keys instead of individual arguments: + +```aql +--- +name: aqlKeep_5 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP(doc, ["foo", "baz"]) +``` + +## KEEP_RECURSIVE() + +`KEEP_RECURSIVE(document, attributeName1, attributeName2, ... attributeNameN) → doc` + +Recursively preserve the attributes `attributeName1` to `attributeNameN` from +`document` and its sub-documents. All other attributes will be removed. + +To do the opposite, use [`UNSET_RECURSIVE()`](#unset_recursive). + +- **document** (object): a document / object +- **attributeNames** (string, *repeatable*): an arbitrary number of attribute + names as multiple arguments (at least 1) +- returns **doc** (object): `document` with only the specified attributes at + all levels (top-level as well as nested objects) + +**Examples** + +Recursively preserve `foo` attributes, but not nested attributes that have +parents with other names: + +```aql +--- +name: aqlKeepRecursive_1 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "foo") +``` + +Recursively preserve `bar` attributes, but there is none at the top-level, leading +to an empty object: + +```aql +--- +name: aqlKeepRecursive_2 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "bar") +``` + +Recursively preserve `baz` attributes, but not nested attributes that have +parents with other names: + +```aql +--- +name: aqlKeepRecursive_3 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "baz") +``` + +Recursively preserve multiple attributes (`foo` and `bar`): + +```aql +--- +name: aqlKeepRecursive_4 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "foo", "bar") +``` + +Recursively preserve multiple attributes (`foo` and `baz`), but not nested +attributes that have parents with other names: + +```aql +--- +name: aqlKeepRecursive_5 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "foo", "baz") +``` + +Recursively preserve multiple attributes (`foo`, `bar`, and `baz`), preserving all +attributes of the example object: + +```aql +--- +name: aqlKeepRecursive_6 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, "foo", "bar", "baz") +``` + +--- + +`KEEP_RECURSIVE(document, attributeNameArray) → doc` + +- **document** (object): a document / object +- **attributeNameArray** (array): an array of attribute names as strings +- returns **doc** (object): *document* with only the specified attributes at + all levels (top-level as well as nested objects) + +**Examples** + +Recursively preserve multiple attributes (`foo` and `baz`), by passing an array of the +attribute keys instead of individual arguments: + +```aql +--- +name: aqlKeepRecursive_7 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN KEEP_RECURSIVE(doc, ["foo", "baz"]) +``` + +## KEYS() + +This is an alias for [`ATTRIBUTES()`](#attributes). + +## LENGTH() + +`LENGTH(doc) → attrCount` + +Determine the number of attribute keys of an object / document. + +`LENGTH()` can also determine the [number of elements](array.md#length) in an array, +the [amount of documents](miscellaneous.md#length) in a collection and +the [character length](string.md#length) of a string. + +- **doc** (object): a document / object +- returns **attrCount** (number): the number of attribute keys in `doc`, regardless + of their values + +**Examples** + +```aql +--- +name: aqlLengthObject +description: '' +--- +RETURN LENGTH({ name: "Emma", age: 36, phone: { mobile: "..." } }) +``` + +## MATCHES() + +`MATCHES(document, examples, returnIndex) → match` + +Compare the given `document` against each example document provided. The comparisons +will be started with the first example. All attributes of the example will be compared +against the attributes of `document`. If all attributes match, the comparison stops +and the result is returned. If there is a mismatch, the function will continue the +comparison with the next example until there are no more examples left. + +The `examples` can be an array of 1..n example documents or a single document, +with any number of attributes each. + +An attribute value of `null` will match documents with an explicit attribute value +of `null` as well as documents with this attribute missing (implicitly `null`). +Only [`HAS()`](#has) can differentiate between an attribute being absent and having +a stored `null` value. + +An empty object `{}` will match all documents. Be careful not to ask for all +documents accidentally. For example, the [arangojs](../../develop/drivers/javascript.md) driver +skips attributes with a value of `undefined`, turning `{attr: undefined}` into `{}`. + +{{< info >}} +`MATCHES()` cannot utilize indexes. You may use plain `FILTER` conditions instead +to potentially benefit from existing indexes: + +```aql +FOR doc IN coll + FILTER (cond1 AND cond2 AND cond3) OR (cond4 AND cond5) ... +``` +{{< /info >}} + +- **document** (object): document to determine whether it matches any example +- **examples** (object\|array): a single document, or an array of documents to compare + against. Specifying an empty array is not allowed. +- **returnIndex** (bool): by setting this flag to `true`, the index of the example that + matched will be returned (starting at offset 0), or `-1` if there was no match. + The default is `false` and makes the function return a boolean. +- returns **match** (bool\|number): if `document` matches one of the examples, `true` is + returned, otherwise `false`. A number is returned instead if `returnIndex` is enabled. + +**Examples** + +Check whether all attributes of the example are present in the document: + +```aql +--- +name: aqlMatches_1 +description: '' +--- +LET doc = { + name: "jane", + age: 27, + active: true +} +RETURN MATCHES(doc, { age: 27, active: true } ) +``` + +Check whether one of the examples matches the document and return the index of +the matching example: + +```aql +--- +name: aqlMatches_2 +description: '' +--- +RETURN MATCHES( + { "test": 1 }, + [ + { "test": 1, "foo": "bar" }, + { "foo": 1 }, + { "test": 1 } + ], +true) +``` + +## MERGE() + +`MERGE(document1, document2, ... documentN) → mergedDocument` + +Merge the documents `document1` to `documentN` into a single document. +If document attribute keys are ambiguous, the merged result will contain the values +of the documents contained later in the argument list. + +Note that merging will only be done for top-level attributes. If you wish to +merge sub-attributes, use [`MERGE_RECURSIVE()`](#merge_recursive) instead. + +- **documents** (object, *repeatable*): an arbitrary number of documents as + multiple arguments (at least 2) +- returns **mergedDocument** (object): a combined document + +**Examples** + +Two documents with distinct attribute names can easily be merged into one: + +```aql +--- +name: aqlMerge_1 +description: '' +--- +RETURN MERGE( + { "user1": { "name": "Jane" } }, + { "user2": { "name": "Tom" } } +) +``` + +When merging documents with identical attribute names, the attribute values of the +latter documents will be used in the end result: + +```aql +--- +name: aqlMerge_2 +description: '' +--- +RETURN MERGE( + { "users": { "name": "Jane" } }, + { "users": { "name": "Tom" } } +) +``` + +--- + +`MERGE(docArray) → mergedDocument` + +`MERGE()` also accepts a single array parameter. This variant allows combining the +attributes of multiple objects in an array into a single object. + +- **docArray** (array): an array of documents, as sole argument +- returns **mergedDocument** (object): a combined document + +**Examples** + +```aql +--- +name: aqlMerge_3 +description: '' +--- +RETURN MERGE( + [ + { foo: "bar" }, + { quux: "quetzalcoatl", ruled: true }, + { bar: "baz", foo: "done" } + ] +) +``` + +{{< tip >}} +Consider to use [`ZIP()`](#zip) instead of `MERGE()` if you want to merge a set +of disjoint keys and their associated values into a single object. + +This could be a pattern like the following where objects with dynamic attribute +keys are created and then merged together (here to return a map of distinct +attribute values and how often they occur): + +```aql +RETURN MERGE( + FOR doc IN coll + COLLECT value = doc.attr WITH COUNT INTO count + RETURN { [value]: count } +) +``` + +This creates many temporary objects and can be slow if there are thousands of +objects to merge. The following pattern using `ZIP()` is more efficient: + +```aql +LET counts = ( + FOR doc IN coll + COLLECT value = doc.attr WITH COUNT INTO count + RETURN [value, count] +) +RETURN ZIP(counts[*][0], counts[*][1]) +``` +{{< /tip >}} + +## MERGE_RECURSIVE() + +`MERGE_RECURSIVE(document1, document2, ... documentN) → mergedDocument` + +Recursively merge the documents `document1` to `documentN` into a single document. +If document attribute keys overlap, the merged result contains the values +of the documents contained later in the argument list. + +- **documents** (object, *repeatable*): an arbitrary number of documents as + multiple arguments (at least 1) +- returns **mergedDocument** (object): a combined document + +**Examples** + +Merge two documents with the same top-level attribute, combining the `name`, +`age`, and `livesIn` sub-attributes: + +```aql +--- +name: aqlMergeRecursive_1 +description: '' +--- +RETURN MERGE_RECURSIVE( + { "user-1": { "name": "Jane", "livesIn": { "city": "LA" } } }, + { "user-1": { "age": 42, "livesIn": { "state": "CA" } } } +) +``` + +`MERGE_RECURSIVE(documents) → mergedDocument` + +Recursively merge the list of documents into a single document. +If document attribute keys overlap, the merged result contains the values +of the documents specified later in the list. + +- **documents** (array): an array with an arbitrary number of objects +- returns **mergedDocument** (object): a combined document + +**Examples** + +Merge a list of two documents with the same top-level attribute, combining the +`name` and `age` sub-attributes but overwriting the `city` value in the +`livesIn` sub-attribute: + +```aql +--- +name: aqlMergeRecursive_2 +description: '' +--- +RETURN MERGE_RECURSIVE( + [ + { "user-1": { "name": "Jane", "livesIn": { "city": "LA" } } }, + { "user-1": { "age": 42, "livesIn": { "city": "NY" } } } + ] +) +``` + +## PARSE_IDENTIFIER() + +`PARSE_IDENTIFIER(documentIdentifier) → parts` + +Parse a [document ID](../../concepts/data-structure/documents/_index.md#document-identifiers) +and separately return the collection name and the document key. + +- **documentIdentifier** (string\|object): a document identifier string (e.g. `_users/1234`) + or a regular document from a collection. Passing either a non-string or a non-document + or a document without an `_id` attribute results in an error. +- returns **parts** (object): an object with the attributes `collection` and `key` + +**Examples** + +Parse a document identifier string and extract both the collection name and the +document key: + +```aql +--- +name: aqlParseIdentifier_1 +description: '' +--- +RETURN PARSE_IDENTIFIER("_users/my-user") +``` + +Parse the `_id` attribute of a document to extract both the collection name and +the document key: + +```aql +--- +name: aqlParseIdentifier_2 +description: '' +--- +RETURN PARSE_IDENTIFIER( { "_id": "mycollection/mykey", "value": "some value" } ) +``` + +## TRANSLATE() + +`TRANSLATE(value, lookupDocument, defaultValue) → mappedValue` + +Look up the specified `value` in the `lookupDocument`. If `value` is a key in +`lookupDocument`, then `value` will be replaced with the lookup value found. +If `value` is not present in `lookupDocument`, then `defaultValue` will be returned +if specified. If no `defaultValue` is specified, `value` will be returned unchanged. + +- **value** (string): the value to encode according to the mapping +- **lookupDocument** (object): a key/value mapping as document +- **defaultValue** (any, *optional*): a fallback value in case `value` is not found +- returns **mappedValue** (any): the encoded value, or the unaltered `value` or `defaultValue` + (if supplied) in case it could not be mapped + +**Examples** + +Translate a country code to a country name: + +```aql +--- +name: aqlTranslate_1 +description: '' +--- +RETURN TRANSLATE("FR", { US: "United States", UK: "United Kingdom", FR: "France" } ) +``` + +The unaltered input value is returned if no match is found in the mapping: + +```aql +--- +name: aqlTranslate_2 +description: '' +--- +RETURN TRANSLATE(42, { foo: "bar", bar: "baz" } ) +``` + +If you specify a fallback value and no match is found in the mapping, then the +fallback value returned instead of the input value: + +```aql +--- +name: aqlTranslate_3 +description: '' +--- +RETURN TRANSLATE(42, { foo: "bar", bar: "baz" }, "not found!") +``` + +Note that any non-string input value is implicitly cast to a string before the +lookup: + +```aql +--- +name: aqlTranslate_4 +description: '' +--- +RETURN TRANSLATE(42, { "42": true } ) +``` + +## UNSET() + +`UNSET(document, attributeName1, attributeName2, ... attributeNameN) → doc` + +Remove the attributes `attributeName1` to `attributeNameN` from `document`. +All other attributes will be preserved. + +To do the opposite, see [`KEEP()`](#keep). + +- **document** (object): a document / object +- **attributeNames** (string, *repeatable*): an arbitrary number of attribute + names as multiple arguments (at least 1) +- returns **doc** (object): `document` without the specified attributes at the + top-level + +**Examples** + +Remove the top-level `foo` attribute, including its nested objects: + +```aql +--- +name: aqlUnset_1 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET(doc, "foo") +``` + +Remove the top-level `bar` attribute, which the example object does not have, +resulting in an unchanged object: + +```aql +--- +name: aqlUnset_2 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET(doc, "bar") +``` + +Remove the top-level `baz` attribute: + +```aql +--- +name: aqlUnset_3 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET(doc, "baz") +``` + +Remove multiple top-level attributes (`foo` and `baz`), resulting in an empty +object in this example: + +```aql +--- +name: aqlUnset_4 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET(doc, "foo", "baz") +``` + +--- + +`UNSET(document, attributeNameArray) → doc` + +- **document** (object): a document / object +- **attributeNameArray** (array): an array of attribute names as strings +- returns **doc** (object): *document* without the specified attributes at the + top-level + +**Examples** + +Remove multiple top-level attributes (`foo` and `baz`), by passing an array of the +attribute keys instead of individual arguments: + +```aql +--- +name: aqlUnset_5 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET(doc, ["foo", "bar"]) +``` + +## UNSET_RECURSIVE() + +`UNSET_RECURSIVE(document, attributeName1, attributeName2, ... attributeNameN) → doc` + +Recursively remove the attributes `attributeName1` to `attributeNameN` from +`document` and its sub-documents. All other attributes will be preserved. + +To do the opposite, use [`KEEP_RECURSIVE()`](#keep_recursive). + +- **document** (object): a document / object +- **attributeNames** (string, *repeatable*): an arbitrary number of attribute + names as multiple arguments (at least 1) +- returns **doc** (object): `document` without the specified attributes at + all levels (top-level as well as nested objects) + +**Examples** + +Recursively remove `foo` attributes: + +```aql +--- +name: aqlUnsetRecursive_1 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "foo") +``` + +Recursively remove `bar` attributes: + +```aql +--- +name: aqlUnsetRecursive_2 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "bar") +``` + +Recursively remove `baz` attributes: + +```aql +--- +name: aqlUnsetRecursive_3 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "baz") +``` + +Recursively remove multiple attributes (`foo` and `bar`): + +```aql +--- +name: aqlUnsetRecursive_4 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "foo", "bar") +``` + +Recursively remove multiple attributes (`foo` and `baz`), removing all +attributes of the example object: + +```aql +--- +name: aqlUnsetRecursive_5 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "foo", "baz") +``` + +Recursively remove multiple attributes (`foo`, `bar`, and `baz`), removing all +attributes of the example object: + +```aql +--- +name: aqlUnsetRecursive_6 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, "foo", "bar", "baz") +``` + +--- + +`UNSET_RECURSIVE(document, attributeNameArray) → doc` + +- **document** (object): a document / object +- **attributeNameArray** (array): an array of attribute names as strings +- returns **doc** (object): *document* without the specified attributes at + all levels (top-level as well as nested objects) + +**Examples** + +Recursively remove `baz` attributes, by passing an array with the attribute key: + +```aql +--- +name: aqlUnsetRecursive_7 +description: '' +--- +LET doc = { foo: { bar: { foo: 1, baz: 2 }, baz: 3 }, baz: 4 } +RETURN UNSET_RECURSIVE(doc, ["baz"]) +``` + +## VALUE() + +`VALUE(document, path) → value` + +Return the specified attribute value of the `document`. + +- **document** (object): a document / object +- **path** (array): an array of strings and numbers that describes the + attribute path. You can select object keys with strings and array elements + with numbers. +- returns **value** (any): the selected value of `document` + +**Examples** + +Dynamically get the inner string, like `obj.foo.bar` would: + +```aql +--- +name: aqlValue_1 +description: '' +--- +LET obj = { foo: { bar: "baz" } } +RETURN VALUE(obj, ["foo", "bar"]) +``` + +Dynamically get the inner object of the second array element of a top-level +attribute, like `obj.foo[1].bar` would: + +```aql +--- +name: aqlValue_2 +description: '' +--- +LET obj = { foo: [ { bar: "baz" }, { bar: { inner: true } } ] } +RETURN VALUE(obj, ["foo", 1, "bar"]) +``` + +## VALUES() + +`VALUES(document, removeSystemAttrs) → anyArray` + +Return the attribute values of the `document` as an array. Optionally omit +system attributes. + +To return the attribute keys instead, see the [`ATTRIBUTES()` function](#attributes). + +- **document** (object): a document / object +- **removeSystemAttrs** (bool, *optional*): if set to `true`, then all + system attributes (starting with an underscore, such as `_id`, `_key` etc.) + are removed from the result +- returns **anyArray** (array): the values of `document` returned in any order + +**Examples** + +Get the attribute values of an object: + +```aql +--- +name: aqlValues_1 +description: '' +--- +RETURN VALUES( { "_id": "users/jane", "name": "Jane", "age": 35 } ) +``` + +Get the attribute values of an object, omitting system attributes: + +```aql +--- +name: aqlValues_2 +description: '' +--- +RETURN VALUES( { "_id": "users/jane", "name": "Jane", "age": 35 }, true ) +``` + +## ZIP() + +`ZIP(keys, values) → doc` + +Return a document object assembled from the separate parameters `keys` and `values`. + +`keys` and `values` must be arrays and have the same length. + +- **keys** (array): an array of strings, to be used as attribute names in the result +- **values** (array): an array with elements of arbitrary types, to be used as + attribute values +- returns **doc** (object): a document with the keys and values assembled + +**Examples** + +```aql +--- +name: aqlZip +description: '' +--- +RETURN ZIP( [ "name", "active", "hobbies" ], [ "some user", true, [ "swimming", "riding" ] ] ) +``` diff --git a/site/content/arangodb/oem/aql/functions/fulltext.md b/site/content/arangodb/oem/aql/functions/fulltext.md new file mode 100644 index 0000000000..ce1c8f738c --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/fulltext.md @@ -0,0 +1,94 @@ +--- +title: Fulltext functions in AQL +menuTitle: Fulltext +weight: 30 +description: >- + AQL offers functions to filter data using fulltext indexes +--- +See [fulltext indexes](../../indexes-and-search/indexing/working-with-indexes/fulltext-indexes.md) +for details. + +{{< warning >}} +The fulltext index type is deprecated from version 3.10 onwards. +It is recommended to use [Inverted indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md) or +[ArangoSearch](../../indexes-and-search/arangosearch/_index.md) for advanced full-text search capabilities. +{{< /warning >}} + +## FULLTEXT() + +`FULLTEXT(coll, attribute, query, limit) → docArray` + +Return all documents from collection *coll*, for which the attribute *attribute* +matches the fulltext search phrase *query*, optionally capped to *limit* results. + +**Note**: the `FULLTEXT()` function requires the collection *coll* to have a +fulltext index on *attribute*. If no fulltext index is available, this function +will fail with an error at runtime. It doesn't fail when explaining the query however. + +- **coll** (collection): a collection +- **attribute** (string): the attribute name of the attribute to search in +- **query** (string): a fulltext search expression as described below +- **limit** (number, *optional*): if set to a non-zero value, it will cap the result + to at most this number of documents +- returns **docArray** (array): an array of documents + +`FULLTEXT()` is not meant to be used as an argument to `FILTER`, +but rather to be used as the expression of a `FOR` statement: + +```aql +FOR oneMail IN FULLTEXT(emails, "body", "banana,-apple") + RETURN oneMail._id +``` + +*query* is a comma-separated list of sought words (or prefixes of sought words). To +distinguish between prefix searches and complete-match searches, each word can optionally be +prefixed with either the `prefix:` or `complete:` qualifier. Different qualifiers can +be mixed in the same query. Not specifying a qualifier for a search word will implicitly +execute a complete-match search for the given word: + +- `FULLTEXT(emails, "body", "banana")`\ + Will look for the word *banana* in the + attribute *body* of the collection *collection*. + +- `FULLTEXT(emails, "body", "banana,orange")`\ + Will look for both words + *banana* and *orange* in the mentioned attribute. Only those documents will be + returned that contain both words. + +- `FULLTEXT(emails, "body", "prefix:head")`\ + Will look for documents that contain any + words starting with the prefix *head*. + +- `FULLTEXT(emails, "body", "prefix:head,complete:aspirin")`\ + Will look for all + documents that contain a word starting with the prefix *head* and that also contain + the (complete) word *aspirin*. Note: specifying `complete:` is optional here. + +- `FULLTEXT(emails, "body", "prefix:cent,prefix:subst")`\ + Will look for all documents + that contain a word starting with the prefix *cent* and that also contain a word + starting with the prefix *subst*. + +If multiple search words (or prefixes) are given, then by default the results will be +AND-combined, meaning only the logical intersection of all searches will be returned. +It is also possible to combine partial results with a logical OR, and with a logical NOT: + +- `FULLTEXT(emails, "body", "+this,+text,+document")`\ + Will return all documents that + contain all the mentioned words. Note: specifying the `+` symbols is optional here. + +- `FULLTEXT(emails, "body", "banana,|apple")`\ + Will return all documents that contain + either (or both) words *banana* or *apple*. + +- `FULLTEXT(emails, "body", "banana,-apple")`\ + Will return all documents that contain + the word *banana*, but do not contain the word *apple*. + +- `FULLTEXT(emails, "body", "banana,pear,-cranberry")`\ + Will return all documents that + contain both the words *banana* and *pear*, but do not contain the word + *cranberry*. + +No precedence of logical operators will be honored in a fulltext query. The query will simply +be evaluated from left to right. diff --git a/site/content/arangodb/oem/aql/functions/geo.md b/site/content/arangodb/oem/aql/functions/geo.md new file mode 100644 index 0000000000..a34fbc1738 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/geo.md @@ -0,0 +1,964 @@ +--- +title: Geo-spatial functions in AQL +menuTitle: Geo +weight: 35 +description: >- + AQL supports functions for geo-spatial queries and a subset of calls can be + accelerated by geo-spatial indexes +--- +## Geo-spatial data representations + +You can model geo-spatial information in different ways using the data types +available in ArangoDB. The recommended way is to use objects with **GeoJSON** +geometry but you can also use **longitude and latitude coordinate pairs** +for points. Both models are supported by +[Geo-Spatial Indexes](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md). + +### Coordinate pairs + +Longitude and latitude coordinates are numeric values and can be stored in the +following ways: + +- Coordinates using an array with two numbers in `[longitude, latitude]` order, + for example, in a user-chosen attribute called `location`: + + ```json + { + "location": [ -73.983, 40.764 ] + } + ``` + +- Coordinates using an array with two numbers in `[latitude, longitude]` order, + for example, in a user-chosen attribute called `location`: + + ```json + { + "location": [ 40.764, -73.983 ] + } + ``` + +- Coordinates using two separate numeric attributes, for example, in two + user-chosen attributes called `lat` and `lng` as sub-attributes of a `location` + attribute: + + ```json + { + "location": { + "lat": 40.764, + "lng": -73.983 + } + } + ``` + +### GeoJSON + +GeoJSON is a geospatial data format based on JSON. It defines several different +types of JSON objects and the way in which they can be combined to represent +data about geographic shapes on the Earth surface. + +Example of a document with a GeoJSON Point stored in a user-chosen attribute +called `location` (with coordinates in `[longitude, latitude]` order): + +```json +{ + "location": { + "type": "Point", + "coordinates": [ -73.983, 40.764 ] + } +} +``` + +GeoJSON uses a geographic coordinate reference system, +World Geodetic System 1984 (WGS 84), and units of decimal degrees. + +Internally ArangoDB maps all coordinate pairs onto a unit sphere. Distances are +projected onto a sphere with the Earth's *Volumetric mean radius* of *6371 +km*. ArangoDB implements a useful subset of the GeoJSON format +[(RFC 7946)](https://tools.ietf.org/html/rfc7946). +Feature Objects and the GeometryCollection type are not supported. +Supported geometry object types are: + +- Point +- MultiPoint +- LineString +- MultiLineString +- Polygon +- MultiPolygon + +#### Point + +A [GeoJSON Point](https://tools.ietf.org/html/rfc7946#section-3.1.2) is a +[position](https://tools.ietf.org/html/rfc7946#section-3.1.1) comprised of +a longitude and a latitude: + +```json +{ + "type": "Point", + "coordinates": [100.0, 0.0] +} +``` + +#### MultiPoint + +A [GeoJSON MultiPoint](https://tools.ietf.org/html/rfc7946#section-3.1.7) is +an array of positions: + +```json +{ + "type": "MultiPoint", + "coordinates": [ + [100.0, 0.0], + [101.0, 1.0] + ] +} +``` + +#### LineString + +A [GeoJSON LineString](https://tools.ietf.org/html/rfc7946#section-3.1.4) is +an array of two or more positions: + +```json +{ + "type": "LineString", + "coordinates": [ + [100.0, 0.0], + [101.0, 1.0] + ] +} +``` + +#### MultiLineString + +A [GeoJSON MultiLineString](https://tools.ietf.org/html/rfc7946#section-3.1.5) is +an array of LineString coordinate arrays: + +```json +{ + "type": "MultiLineString", + "coordinates": [ + [ + [100.0, 0.0], + [101.0, 1.0] + ], + [ + [102.0, 2.0], + [103.0, 3.0] + ] + ] +} +``` + +#### Polygon + +A [GeoJSON Polygon](https://tools.ietf.org/html/rfc7946#section-3.1.6) consists +of a series of closed `LineString` objects (ring-like). These *Linear Ring* +objects consist of four or more coordinate pairs with the first and last +coordinate pair being equal. Coordinate pairs of a Polygon are an array of +linear ring coordinate arrays. The first element in the array represents +the exterior ring. Any subsequent elements represent interior rings +(holes within the surface). + +The orientation of the first linear ring is crucial: the right-hand-rule +is applied, so that the area to the left of the path of the linear ring +(when walking on the surface of the Earth) is considered to be the +"interior" of the polygon. All other linear rings must be contained +within this interior. According to the GeoJSON standard, the subsequent +linear rings must be oriented following the right-hand-rule, too, +that is, they must run **clockwise** around the hole (viewed from +above). However, ArangoDB is tolerant here (as suggested by the +[GeoJSON standard](https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6)), +all but the first linear ring are inverted if the orientation is wrong. + +In the end, a point is considered to be in the interior of the polygon, +if and only if one has to cross an odd number of linear rings to reach the +exterior of the polygon prescribed by the first linear ring. + +A number of additional rules apply (and are enforced by the GeoJSON +parser): + +- A polygon must contain at least one linear ring, i.e., it must not be + empty. +- A linear ring may not be empty, it needs at least three _distinct_ + coordinate pairs, that is, at least 4 coordinate pairs (since the first and + last must be the same). +- No two edges of linear rings in the polygon must intersect, in + particular, no linear ring may be self-intersecting. +- Within the same linear ring, consecutive coordinate pairs may be the same, + otherwise all coordinate pairs need to be distinct (except the first and last one). +- Linear rings of a polygon must not share edges, but they may share coordinate pairs. +- A linear ring defines two regions on the sphere. ArangoDB always + interprets the region that lies to the left of the boundary ring (in + the direction of its travel on the surface of the Earth) as the + interior of the ring. This is in contrast to earlier versions of + ArangoDB before 3.10, which always took the **smaller** of the two + regions as the interior. Therefore, from 3.10 on one can now have + polygons whose outer ring encloses more than half the Earth's surface. +- The interior rings must be contained in the (interior) of the outer ring. +- Interior rings should follow the above rule for orientation + (counterclockwise external rings, clockwise internal rings, interior + always to the left of the line). + +Here is an example with no holes: + +```json +{ + "type": "Polygon", + "coordinates": [ + [ + [100.0, 0.0], + [101.0, 0.0], + [101.0, 1.0], + [100.0, 1.0], + [100.0, 0.0] + ] + ] +} +``` + +Here is an example with a hole: + +```json +{ + "type": "Polygon", + "coordinates": [ + [ + [100.0, 0.0], + [101.0, 0.0], + [101.0, 1.0], + [100.0, 1.0], + [100.0, 0.0] + ], + [ + [100.8, 0.8], + [100.8, 0.2], + [100.2, 0.2], + [100.2, 0.8], + [100.8, 0.8] + ] + ] +} +``` + +#### MultiPolygon + +A [GeoJSON MultiPolygon](https://tools.ietf.org/html/rfc7946#section-3.1.6) consists +of multiple polygons. The "coordinates" member is an array of +_Polygon_ coordinate arrays. See [above](#polygon) for the rules and +the meaning of polygons. + +If the polygons in a MultiPolygon are disjoint, then a point is in the +interior of the MultiPolygon if and only if it is +contained in one of the polygons. If some polygon P2 in a MultiPolygon +is contained in another polygon P1, then P2 is treated like a hole +in P1 and containment of points is defined with the even-odd-crossings rule +(see [Polygon](#polygon)). + +Additionally, the following rules apply and are enforced for +MultiPolygons: + +- No two edges in the linear rings of the polygons of a MultiPolygon + may intersect. +- Polygons in the same MultiPolygon may not share edges, but they may share + coordinate pairs. + +Example with two polygons, the second one with a hole: + +```json +{ + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [102.0, 2.0], + [103.0, 2.0], + [103.0, 3.0], + [102.0, 3.0], + [102.0, 2.0] + ] + ], + [ + [ + [100.0, 0.0], + [101.0, 0.0], + [101.0, 1.0], + [100.0, 1.0], + [100.0, 0.0] + ], + [ + [100.2, 0.2], + [100.2, 0.8], + [100.8, 0.8], + [100.8, 0.2], + [100.2, 0.2] + ] + ] + ] +} +``` + +## GeoJSON interpretation + +Note the following technical detail about GeoJSON: The +[GeoJSON standard, Section 3.1.1 Position](https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.1) +prescribes that lines are **cartesian lines in cylindrical coordinates** +(longitude/latitude). However, this definition is inconvenient in practice, +since such lines are not geodesic on the surface of the Earth. +Furthermore, the best available algorithms for geospatial computations on Earth +typically use geodesic lines as the boundaries of polygons on Earth. + +Therefore, ArangoDB uses the **syntax of the GeoJSON** standard, +but then interprets lines (and boundaries of polygons) as +**geodesic lines (pieces of great circles) on Earth**. This is a +violation of the GeoJSON standard, but serving a practical purpose. + +Note in particular that this can sometimes lead to unexpected results. +Consider the following polygon (remember that GeoJSON has +**longitude before latitude** in coordinate pairs): + +```json +{ "type": "Polygon", "coordinates": [[ + [4, 54], [4, 47], [16, 47], [16, 54], [4, 54] +]] } +``` + +![GeoJSON Polygon Geodesic](../../../../images/geojson-polygon-geodesic.webp) + +It does not contain the point `[10, 47]` since the shortest path (geodesic) +from `[4, 47]` to `[16, 47]` lies North relative to the parallel of latitude at +47 degrees. On the contrary, the polygon does contain the point `[10, 54]` as it +lies South of the parallel of latitude at 54 degrees. + +{{< info >}} +ArangoDB version before 3.10 did an inconsistent special detection of "rectangle" +polygons that later versions from 3.10 onward no longer do, see +[Legacy Polygons](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md#legacy-polygons). +{{< /info >}} + +Furthermore, there is an issue with the interpretation of linear rings +(boundaries of polygons) according to +[GeoJSON standard, Section 3.1.6 Polygon](https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6). +This section states explicitly: + +> A linear ring MUST follow the right-hand rule with respect to the +> area it bounds, i.e., exterior rings are counter-clockwise, and +> holes are clockwise. + +This rather misleading phrase means that when a linear ring is used as +the boundary of a polygon, the "interior" of the polygon lies **to the left** +of the boundary when one travels on the surface of the Earth and +along the linear ring. For +example, the polygon below travels **counter-clockwise** around the point +`[10, 50]`, and thus the interior of the polygon contains this point and +its surroundings, but not, for example, the North Pole and the South +Pole. + +```json +{ "type": "Polygon", "coordinates": [[ + [4, 54], [4, 47], [16, 47], [16, 54], [4, 54] +]] } +``` + +![GeoJSON Polygon Counter-clockwise](../../../../images/geojson-polygon-ccw.webp) + +On the other hand, the following polygon travels **clockwise** around the point +`[10, 50]`, and thus its "interior" does not contain `[10, 50]`, but does +contain the North Pole and the South Pole: + +```json +{ "type": "Polygon", "coordinates": [[ + [4, 54], [16, 54], [16, 47], [4, 47], [4, 54] +]] } +``` + +![GeoJSON Polygon Clockwise](../../../../images/geojson-polygon-cw.webp) + +Remember that the "interior" is to the left of the given +linear ring, so this second polygon is basically the complement on Earth +of the previous polygon! + +ArangoDB versions before 3.10 did not follow this rule and always took the +"smaller" connected component of the surface as the "interior" of the polygon. +This made it impossible to specify polygons which covered more than half of the +sphere. From version 3.10 onward, ArangoDB recognizes this correctly. +See [Legacy Polygons](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md#legacy-polygons) +for how to deal with this issue. + +## Geo utility functions + +The following helper functions **can** use geo indexes, but do not have to in +all cases. You can use all of these functions in combination with each other, +and if you have configured a geo index it may be utilized, +see [Geo Indexing](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md). + +### DISTANCE() + +`DISTANCE(latitude1, longitude1, latitude2, longitude2) → distance` + +Calculate the distance between two arbitrary points in meters (as birds +would fly). The value is computed using the haversine formula, which is based +on a spherical Earth model. It's fast to compute and is accurate to around 0.3%, +which is sufficient for most use cases such as location-aware services. + +- **latitude1** (number): the latitude of the first point +- **longitude1** (number): the longitude of the first point +- **latitude2** (number): the latitude of the second point +- **longitude2** (number): the longitude of the second point +- returns **distance** (number): the distance between both points in **meters** + +```aql +// Distance from Brandenburg Gate (Berlin) to ArangoDB headquarters (Cologne) +DISTANCE(52.5163, 13.3777, 50.9322, 6.94) // 476918.89688380965 (~477km) + +// Sort a small number of documents based on distance to Central Park (New York) +FOR doc IN coll // e.g. documents returned by a traversal + SORT DISTANCE(doc.latitude, doc.longitude, 40.78, -73.97) + RETURN doc +``` + +### GEO_CONTAINS() + +`GEO_CONTAINS(geoJsonA, geoJsonB) → bool` + +Checks whether the [GeoJSON object](#geojson) `geoJsonA` +fully contains `geoJsonB` (every point in B is also in A). The object `geoJsonA` +has to be of type _Polygon_ or _MultiPolygon_. For other types containment is +not well-defined because of numerical stability problems. + +- **geoJsonA** (object): first GeoJSON object +- **geoJsonB** (object): second GeoJSON object, or a coordinate array in + `[longitude, latitude]` order +- returns **bool** (bool): `true` if every point in B is also contained in A, + otherwise `false` + +{{< info >}} +ArangoDB follows and exposes the same behavior as the underlying +S2 geometry library. As stated in the S2 documentation: + +> Point containment is defined such that if the sphere is subdivided +> into faces (loops), every point is contained by exactly one face. +> This implies that linear rings do not necessarily contain their vertices. + +As a consequence, a linear ring or polygon does not necessarily contain its +boundary edges! +{{< /info >}} + +You can optimize queries that contain a `FILTER` expression of the following +form with an S2-based [geospatial index](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md): + +```aql +FOR doc IN coll + FILTER GEO_CONTAINS(geoJson, doc.geo) + ... +``` + +In this example, you would create the index for the collection `coll`, on the +attribute `geo`. You need to set the `geoJson` index option to `true`. +The `geoJson` variable needs to evaluate to a valid GeoJSON object. Also note +the argument order: the stored document attribute `doc.geo` is passed as the +second argument. Passing it as the first argument, like +`FILTER GEO_CONTAINS(doc.geo, geoJson)` to test whether `doc.geo` contains +`geoJson`, cannot utilize the index. + +### GEO_DISTANCE() + +`GEO_DISTANCE(geoJsonA, geoJsonB, ellipsoid) → distance` + +Return the distance between two GeoJSON objects in meters, measured from the +**centroid** of each shape. For a list of supported types see the +[geo index page](#geojson). + +- **geoJsonA** (object): first GeoJSON object, or a coordinate array in + `[longitude, latitude]` order +- **geoJsonB** (object): second GeoJSON object, or a coordinate array in + `[longitude, latitude]` order +- **ellipsoid** (string, *optional*): reference ellipsoid to use. + Supported are `"sphere"` (default) and `"wgs84"`. +- returns **distance** (number): the distance between the centroid points of + the two objects on the reference ellipsoid in **meters** + +```aql +LET polygon = { + type: "Polygon", + coordinates: [[[-11.5, 23.5], [-10.5, 26.1], [-11.2, 27.1], [-11.5, 23.5]]] +} +FOR doc IN collectionName + LET distance = GEO_DISTANCE(doc.geometry, polygon) // calculates the distance + RETURN distance +``` + +You can optimize queries that contain a `FILTER` expression of the following +form with an S2-based [geospatial index](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md): + +```aql +FOR doc IN coll + FILTER GEO_DISTANCE(geoJson, doc.geo) <= limit + ... +``` + +In this example, you would create the index for the collection `coll`, on the +attribute `geo`. You need to set the `geoJson` index option to `true`. +`geoJson` needs to evaluate to a valid GeoJSON object. `limit` must be a +distance in meters; it cannot be an expression. An upper bound with `<`, +a lower bound with `>` or `>=`, or both, are equally supported. + +You can also optimize queries that use a `SORT` condition of the following form +with a geospatial index: + +```aql + SORT GEO_DISTANCE(geoJson, doc.geo) +``` + +The index covers returning matches from closest to furthest away, or vice versa. +You may combine such a `SORT` with a `FILTER` expression that utilizes the +geospatial index, too, via the [`GEO_DISTANCE()`](#geo_distance), +[`GEO_CONTAINS()`](#geo_contains), and [`GEO_INTERSECTS()`](#geo_intersects) +functions. + +### GEO_AREA() + +`GEO_AREA(geoJson, ellipsoid) → area` + +Return the area for a [Polygon](#polygon) or [MultiPolygon](#multipolygon) +on a sphere with the average Earth radius, or an ellipsoid. + +- **geoJson** (object): a GeoJSON object +- **ellipsoid** (string, *optional*): reference ellipsoid to use. + Supported are `"sphere"` (default) and `"wgs84"`. +- returns **area** (number): the area of the polygon in **square meters** + +```aql +LET polygon = { + type: "Polygon", + coordinates: [[[-11.5, 23.5], [-10.5, 26.1], [-11.2, 27.1], [-11.5, 23.5]]] +} +RETURN GEO_AREA(polygon, "wgs84") +``` + +### GEO_EQUALS() + +`GEO_EQUALS(geoJsonA, geoJsonB) → bool` + +Checks whether two [GeoJSON objects](#geojson) are equal or not. + +- **geoJsonA** (object): first GeoJSON object. +- **geoJsonB** (object): second GeoJSON object. +- returns **bool** (bool): `true` if they are equal, otherwise `false`. + +```aql +LET polygonA = GEO_POLYGON([ + [-11.5, 23.5], [-10.5, 26.1], [-11.2, 27.1], [-11.5, 23.5] +]) +LET polygonB = GEO_POLYGON([ + [-11.5, 23.5], [-10.5, 26.1], [-11.2, 27.1], [-11.5, 23.5] +]) +RETURN GEO_EQUALS(polygonA, polygonB) // true +``` + +```aql +LET polygonA = GEO_POLYGON([ + [-11.1, 24.0], [-10.5, 26.1], [-11.2, 27.1], [-11.1, 24.0] +]) +LET polygonB = GEO_POLYGON([ + [-11.5, 23.5], [-10.5, 26.1], [-11.2, 27.1], [-11.5, 23.5] +]) +RETURN GEO_EQUALS(polygonA, polygonB) // false +``` + +### GEO_INTERSECTS() + +`GEO_INTERSECTS(geoJsonA, geoJsonB) → bool` + +Checks whether the [GeoJSON object](#geojson) `geoJsonA` +intersects with `geoJsonB` (i.e. at least one point in B is also in A or vice-versa). + +- **geoJsonA** (object): first GeoJSON object +- **geoJsonB** (object): second GeoJSON object, or a coordinate array in + `[longitude, latitude]` order +- returns **bool** (bool): true if B intersects A, false otherwise + +You can optimize queries that contain a `FILTER` expression of the following +form with an S2-based [geospatial index](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md): + +```aql +FOR doc IN coll + FILTER GEO_INTERSECTS(geoJson, doc.geo) + ... +``` + +In this example, you would create the index for the collection `coll`, on the +attribute `geo`. You need to set the `geoJson` index option to `true`. +`geoJson` needs to evaluate to a valid GeoJSON object. Also note +the argument order: the stored document attribute `doc.geo` is passed as the +second argument. Passing it as the first argument, like +`FILTER GEO_INTERSECTS(doc.geo, geoJson)` to test whether `doc.geo` intersects +`geoJson`, cannot utilize the index. + +### GEO_IN_RANGE() + +Introduced in: v3.8.0 + +`GEO_IN_RANGE(geoJsonA, geoJsonB, low, high, includeLow, includeHigh) → bool` + +Checks whether the distance between two [GeoJSON objects](#geojson) +lies within a given interval. The distance is measured from the **centroid** of +each shape. + +- **geoJsonA** (object\|array): first GeoJSON object, or a coordinate array + in `[longitude, latitude]` order +- **geoJsonB** (object\|array): second GeoJSON object, or a coordinate array + in `[longitude, latitude]` order +- **low** (number): minimum value of the desired range +- **high** (number): maximum value of the desired range +- **includeLow** (bool, optional): whether the minimum value shall be included + in the range (left-closed interval) or not (left-open interval). The default + value is `true` +- **includeHigh** (bool): whether the maximum value shall be included in the + range (right-closed interval) or not (right-open interval). The default value + is `true` +- returns **bool** (bool): whether the evaluated distance lies within the range + +### IS_IN_POLYGON() + +Determine whether a point is inside a polygon. + +{{< warning >}} +The `IS_IN_POLYGON()` AQL function is **deprecated** as of ArangoDB 3.4.0 in +favor of the new [`GEO_CONTAINS()` AQL function](#geo_contains), which works with +[GeoJSON](https://tools.ietf.org/html/rfc7946) Polygons and MultiPolygons. +{{< /warning >}} + +`IS_IN_POLYGON(polygon, latitude, longitude) → bool` + +- **polygon** (array): an array of arrays with 2 elements each, representing the + points of the polygon in the format `[latitude, longitude]` +- **latitude** (number): the latitude of the point to search +- **longitude** (number): the longitude of the point to search +- returns **bool** (bool): `true` if the point (`[latitude, longitude]`) is + inside the `polygon` or `false` if it's not. The result is undefined (can be + `true` or `false`) if the specified point is exactly on a boundary of the + polygon. + +```aql +// checks if the point (latitude 4, longitude 7) is contained inside the polygon +IS_IN_POLYGON( [ [ 0, 0 ], [ 0, 10 ], [ 10, 10 ], [ 10, 0 ] ], 4, 7 ) +``` + +--- + +`IS_IN_POLYGON(polygon, coord, useLonLat) → bool` + +The 2nd parameter can alternatively be specified as an array with two values. + +By default, each array element in `polygon` is expected to be in the format +`[latitude, longitude]`. This can be changed by setting the 3rd parameter to `true` to +interpret the points as `[longitude, latitude]`. `coord` is then also interpreted in +the same way. + +- **polygon** (array): an array of arrays with 2 elements each, representing the + points of the polygon +- **coord** (array): the point to search as a numeric array with two elements +- **useLonLat** (bool, *optional*): if set to `true`, the coordinates in + `polygon` and the coordinate pair `coord` are interpreted as + `[longitude, latitude]` (like in GeoJSON). The default is `false` and the + format `[latitude, longitude]` is expected. +- returns **bool** (bool): `true` if the point `coord` is inside the `polygon` + or `false` if it's not. The result is undefined (can be `true` or `false`) if + the specified point is exactly on a boundary of the polygon. + +```aql +// checks if the point (lat 4, lon 7) is contained inside the polygon +IS_IN_POLYGON( [ [ 0, 0 ], [ 0, 10 ], [ 10, 10 ], [ 10, 0 ] ], [ 4, 7 ] ) + +// checks if the point (lat 4, lon 7) is contained inside the polygon +IS_IN_POLYGON( [ [ 0, 0 ], [ 10, 0 ], [ 10, 10 ], [ 0, 10 ] ], [ 7, 4 ], true ) +``` + +## GeoJSON Constructors + +The following helper functions are available to easily create valid GeoJSON +output. In all cases you can write equivalent JSON yourself, but these functions +will help you to make all your AQL queries shorter and easier to read. + +### GEO_LINESTRING() + +`GEO_LINESTRING(points) → geoJson` + +Construct a GeoJSON LineString. +Needs at least two longitude/latitude pairs. + +- **points** (array): an array of `[longitude, latitude]` pairs +- returns **geoJson** (object): a valid GeoJSON LineString + +```aql +--- +name: aqlGeoLineString_1 +description: '' +--- +RETURN GEO_LINESTRING([ + [35, 10], [45, 45] +]) +``` + +### GEO_MULTILINESTRING() + +`GEO_MULTILINESTRING(points) → geoJson` + +Construct a GeoJSON MultiLineString. +Needs at least two elements consisting valid LineStrings coordinate arrays. + +- **points** (array): array of LineStrings +- returns **geoJson** (object): a valid GeoJSON MultiLineString + +```aql +--- +name: aqlGeoMultiLineString_1 +description: '' +--- +RETURN GEO_MULTILINESTRING([ + [[100.0, 0.0], [101.0, 1.0]], + [[102.0, 2.0], [101.0, 2.3]] +]) +``` + +### GEO_MULTIPOINT() + +`GEO_MULTIPOINT(points) → geoJson` + +Construct a GeoJSON LineString. Needs at least two longitude/latitude pairs. + +- **points** (array): an array of `[longitude, latitude]` pairs +- returns **geoJson** (object): a valid GeoJSON Point + +```aql +--- +name: aqlGeoMultiPoint_1 +description: '' +--- +RETURN GEO_MULTIPOINT([ + [35, 10], [45, 45] +]) +``` + +### GEO_POINT() + +`GEO_POINT(longitude, latitude) → geoJson` + +Construct a valid GeoJSON Point. + +- **longitude** (number): the longitude portion of the point +- **latitude** (number): the latitude portion of the point +- returns **geoJson** (object): a GeoJSON Point + +```aql +--- +name: aqlGeoPoint_1 +description: '' +--- +RETURN GEO_POINT(1.0, 2.0) +``` + +### GEO_POLYGON() + +`GEO_POLYGON(points) → geoJson` + +Construct a GeoJSON Polygon. Needs at least one array representing +a linear ring. Each linear ring consists of an array with at least four +longitude/latitude pairs. The first linear ring must be the outermost, while +any subsequent linear ring will be interpreted as holes. + +For details about the rules, see [GeoJSON polygons](#polygon). + +- **points** (array): an array of (arrays of) `[longitude, latitude]` pairs +- returns **geoJson** (object\|null): a valid GeoJSON Polygon + +A validation step is performed using the S2 geometry library. If the +validation is not successful, an AQL warning is issued and `null` is +returned. + +Simple Polygon: + +```aql +--- +name: aqlGeoPolygon_1 +description: '' +--- +RETURN GEO_POLYGON([ + [0.0, 0.0], [7.5, 2.5], [0.0, 5.0], [0.0, 0.0] +]) +``` + +Advanced Polygon with a hole inside: + +```aql +--- +name: aqlGeoPolygon_2 +description: '' +--- +RETURN GEO_POLYGON([ + [[35, 10], [45, 45], [15, 40], [10, 20], [35, 10]], + [[20, 30], [30, 20], [35, 35], [20, 30]] +]) +``` + +### GEO_MULTIPOLYGON() + +`GEO_MULTIPOLYGON(polygons) → geoJson` + +Construct a GeoJSON MultiPolygon. Needs at least two Polygons inside. +See [`GEO_POLYGON()`](#geo_polygon) and [GeoJSON MultiPolygon](#multipolygon) +for the rules of Polygon and MultiPolygon construction. + +- **polygons** (array): an array of arrays of arrays of `[longitude, latitude]` pairs +- returns **geoJson** (object\|null): a valid GeoJSON MultiPolygon + +A validation step is performed using the S2 geometry library, if the +validation is not successful, an AQL warning is issued and `null` is +returned. + +MultiPolygon comprised of a simple Polygon and a Polygon with hole: + +```aql +--- +name: aqlGeoMultiPolygon_1 +description: '' +--- +RETURN GEO_MULTIPOLYGON([ + [ + [[40, 40], [20, 45], [45, 30], [40, 40]] + ], + [ + [[20, 35], [10, 30], [10, 10], [30, 5], [45, 20], [20, 35]], + [[30, 20], [20, 15], [20, 25], [30, 20]] + ] +]) +``` + +## Geo Index Functions + +{{< warning >}} +The AQL functions `NEAR()`, `WITHIN()` and `WITHIN_RECTANGLE()` are +deprecated starting from version 3.4.0. +Please use the [Geo utility functions](#geo-utility-functions) instead. +{{< /warning >}} + +AQL offers the following functions to filter data based on +[geo indexes](../../indexes-and-search/indexing/working-with-indexes/geo-spatial-indexes.md). These functions require the collection +to have at least one geo index. If no geo index can be found, calling this +function will fail with an error at runtime. There is no error when explaining +the query however. + +### NEAR() + +{{< warning >}} +`NEAR()` is a deprecated AQL function from version 3.4.0 on. +Use [`DISTANCE()`](#distance) in a query like this instead: + +```aql +FOR doc IN coll + SORT DISTANCE(doc.latitude, doc.longitude, paramLatitude, paramLongitude) ASC + RETURN doc +``` +Assuming there exists a geo-type index on `latitude` and `longitude`, the +optimizer will recognize it and accelerate the query. +{{< /warning >}} + +`NEAR(coll, latitude, longitude, limit, distanceName) → docArray` + +Return at most *limit* documents from collection *coll* that are near +*latitude* and *longitude*. The result contains at most *limit* documents, +returned sorted by distance, with closest distances being returned first. +Optionally, the distances in meters between the specified coordinate pair +(*latitude* and *longitude*) and the stored coordinate pairs can be returned as +well. To make use of that, the desired attribute name for the distance result +has to be specified in the *distanceName* argument. The result documents will +contain the distance value in an attribute of that name. + +- **coll** (collection): a collection +- **latitude** (number): the latitude of the point to search +- **longitude** (number): the longitude of the point to search +- **limit** (number, *optional*): cap the result to at most this number of + documents. The default is 100. If more documents than *limit* are found, + it is undefined which ones will be returned. +- **distanceName** (string, *optional*): include the distance (in meters) + between the reference point and the stored point in the result, using the + attribute name *distanceName* +- returns **docArray** (array): an array of documents, sorted by distance + (shortest distance first) + +### WITHIN() + +{{< warning >}} +`WITHIN()` is a deprecated AQL function from version 3.4.0 on. +Use [`DISTANCE()`](#distance) in a query like this instead: + +```aql +FOR doc IN coll + LET d = DISTANCE(doc.latitude, doc.longitude, paramLatitude, paramLongitude) + FILTER d <= radius + SORT d ASC + RETURN doc +``` + +Assuming there exists a geo-type index on `latitude` and `longitude`, the +optimizer will recognize it and accelerate the query. +{{< /warning >}} + +`WITHIN(coll, latitude, longitude, radius, distanceName) → docArray` + +Return all documents from collection *coll* that are within a radius of *radius* +around the specified coordinate pair (*latitude* and *longitude*). The documents +returned are sorted by distance to the reference point, with the closest +distances being returned first. Optionally, the distance (in meters) between the +reference point and the stored point can be returned as well. To make +use of that, an attribute name for the distance result has to be specified in +the *distanceName* argument. The result documents will contain the distance +value in an attribute of that name. + +- **coll** (collection): a collection +- **latitude** (number): the latitude of the point to search +- **longitude** (number): the longitude of the point to search +- **radius** (number): radius in meters +- **distanceName** (string, *optional*): include the distance (in meters) + between the reference point and stored point in the result, using the + attribute name *distanceName* +- returns **docArray** (array): an array of documents, sorted by distance + (shortest distance first) + +### WITHIN_RECTANGLE() + +{{< warning >}} +`WITHIN_RECTANGLE()` is a deprecated AQL function from version 3.4.0 on. Use +[`GEO_CONTAINS()`](#geo_contains) and a GeoJSON polygon instead - but note that +this uses geodesic lines from version 3.10.0 onward +(see [GeoJSON interpretation](#geojson-interpretation)): + +```aql +LET rect = GEO_POLYGON([ [ + [longitude1, latitude1], // bottom-left + [longitude2, latitude1], // bottom-right + [longitude2, latitude2], // top-right + [longitude1, latitude2], // top-left + [longitude1, latitude1], // bottom-left +] ]) +FOR doc IN coll + FILTER GEO_CONTAINS(rect, [doc.longitude, doc.latitude]) + RETURN doc +``` + +Assuming there exists a geo-type index on `latitude` and `longitude`, the +optimizer will recognize it and accelerate the query. +{{< /warning >}} + +`WITHIN_RECTANGLE(coll, latitude1, longitude1, latitude2, longitude2) → docArray` + +Return all documents from collection *coll* that are positioned inside the +bounding rectangle with the points (*latitude1*, *longitude1*) and (*latitude2*, +*longitude2*). There is no guaranteed order in which the documents are returned. + +- **coll** (collection): a collection +- **latitude1** (number): the latitude of the bottom-left point to search +- **longitude1** (number): the longitude of the bottom-left point to search +- **latitude2** (number): the latitude of the top-right point to search +- **longitude2** (number): the longitude of the top-right point to search +- returns **docArray** (array): an array of documents, in random order diff --git a/site/content/arangodb/oem/aql/functions/miscellaneous.md b/site/content/arangodb/oem/aql/functions/miscellaneous.md new file mode 100644 index 0000000000..fba18e52bf --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/miscellaneous.md @@ -0,0 +1,803 @@ +--- +title: Miscellaneous functions in AQL +menuTitle: Miscellaneous +weight: 40 +description: >- + AQL functions that do not fall into other categories are listed here +--- +## Control flow functions + +### FIRST_DOCUMENT() + +`FIRST_DOCUMENT(alternative, ...) → doc` + +Return the first alternative that is a document, and *null* if none of the +alternatives is a document. + +- **alternative** (any, *repeatable*): input of arbitrary type +- returns **doc** (object\|null): document / object or null + +### FIRST_LIST() + +`FIRST_LIST(alternative, ...) → list` + +Return the first alternative that is an array, and *null* if none of the +alternatives is an array. + +- **alternative** (any, *repeatable*): input of arbitrary type +- returns **list** (array\|null): array / list or null + +### MIN_MATCH() + +`MIN_MATCH(expr1, ... exprN, minMatchCount) → fulfilled` + +Match documents where at least **minMatchCount** of the specified +AQL expressions are satisfied. + +There is a corresponding [`MIN_MATCH()` ArangoSearch function](arangosearch.md#min_match) +that can utilize View indexes. + +- **expr** (expression, _repeatable_): any valid AQL expression +- **minMatchCount** (number): minimum number of expressions that should + be satisfied +- returns **fulfilled** (bool): whether at least **minMatchCount** of the + specified expressions are `true` + +You can use `MIN_MATCH()` to filter if two out of three conditions evaluate to +`true` for instance: + +```aql +LET members = [ + { name: "Carol", age: 41, active: true }, + { name: "Doug", age: 56, active: true }, +] +FOR doc IN members + FILTER MIN_MATCH(LENGTH(doc.name) == 5, doc.age >= 50, doc.active, 2) + RETURN doc +``` + +An equivalent filter expression without `MIN_MATCH()` would be more cumbersome: + +```aql + FILTER (LENGTH(doc.name) == 5 AND doc.age >= 50) + OR (doc.age >= 50 AND doc.active) + OR (doc.active AND LENGTH(doc.name) == 5) +``` + +### NOT_NULL() + +`NOT_NULL(alternative, ...) → value` + +Return the first element that is not *null*, and *null* if all alternatives +are *null* themselves. It is also known as `COALESCE()` in SQL. + +- **alternative** (any, *repeatable*): input of arbitrary type +- returns **value** (any): first non-null parameter, or *null* if all arguments + are *null* + +### Ternary operator + +For conditional evaluation, check out the +[ternary operator](../operators.md#ternary-operator). + +## Database functions + +### CHECK_DOCUMENT() + +`CHECK_DOCUMENT(document) → checkResult` + +Returns *true* if *document* is a valid document object, i.e. a document +without any duplicate attribute names. Will return *false* for any +non-objects/non-documents or documents with duplicate attribute names. + +{{< warning >}} +This is an internal function for validating database objects and +is not supposed to be useful for anything else. +{{< /warning >}} + +The primary use case for this function is to apply it on all +documents in a given collection as follows: + +```aql +FOR doc IN collection + FILTER !CHECK_DOCUMENT(doc) + RETURN JSON_STRINGIFY(doc) +``` + +This query will return all documents in the given collection with redundant +attribute names and export them. This output can be used for subsequent +cleanup operations. + +{{< info >}} +When using object literals in AQL, there will be an automatic +removal/cleanup of duplicate attribute names, so the function will be effective +only for **already stored** database documents. Therefore, +`RETURN CHECK_DOCUMENT( { a: 1, a: 2 } )` is expected to return `true`. +{{< /info >}} + +- **document** (object): an arbitrary document / object +- returns **checkResult** (bool): *true* for any valid objects/documents without + duplicate attribute names, and *false* for any non-objects/non-documents or + objects/documents with duplicate attribute names + +### COLLECTION_COUNT() + +`COLLECTION_COUNT(coll) → count` + +Determine the amount of documents in a collection. [`LENGTH()`](#length) +is preferred. + +### COLLECTIONS() + +`COLLECTIONS() → docArray` + +Return an array of collections. + +- returns **docArray** (array): each collection as a document with attributes + *name* and *_id* in an array + +### COUNT() + +This is an alias for [`LENGTH()`](#length). + +### CURRENT_DATABASE() + +`CURRENT_DATABASE() → databaseName` + +Returns the name of the current database. + +The current database is the database name that was specified in the URL path of the request (or defaults to _system database). + +- returns **databaseName** (string): the current database name + +### CURRENT_USER() + +`CURRENT_USER() → userName` + +Return the name of the current user. + +The current user is the user account name that was specified in the +*Authorization* HTTP header of the request. It will only be populated if +authentication on the server is turned on, and if the query was executed inside +a request context. Otherwise, the return value of this function will be *null*. + +- returns **userName** (string\|null): the current user name, or *null* if + authentication is disabled + +### DECODE_REV() + +`DECODE_REV(revision) → details` + +Decompose the specified `revision` string into its components. +The resulting object has a `date` and a `count` attribute. +This function is supposed to be called with the `_rev` attribute value +of a database document as argument. + +- **revision** (string): revision ID string +- returns **details** (object\|null): object with two attributes + *date* (string in ISO 8601 format) and *count* (integer number), + or *null* + +If the input revision ID is not a string or cannot be processed, the function +issues a warning and returns *null*. + +Please note that the result structure may change in future versions of +ArangoDB in case the internal format of revision strings is modified. Please +also note that the *date* value in the current result provides the date and +time of when the document record was put together on the server, but not +necessarily the time of insertion into the underlying storage engine. Therefore +in case of concurrent document operations the exact document storage order +cannot be derived unambiguously from the revision value. It should thus be +treated as a rough estimate of when a document was created or last updated. + +```aql +DECODE_REV( "_YU0HOEG---" ) +// { "date" : "2019-03-11T16:15:05.314Z", "count" : 0 } +``` + +### DOCUMENT() + +Dynamically look up one or multiple documents from any collections, either using +a collection name and one or more document keys, or one or more document +identifiers. The collections do not need to be known at query compile time, they +can be computed at runtime. + +{{< info >}} +It is recommended to use subqueries with the [`FOR` operation](../high-level-operations/for.md) +and filters over `DOCUMENT()` whenever the collections are known in advance, +especially for [joins](../examples-and-query-patterns/joins.md), because they perform better, you +can add additional filters, and combine it with sorting to get an array of +documents in a guaranteed order. + +Queries that use the `DOCUMENT()` function cannot be +[**cached**](../execution-and-performance/caching-query-results.md), each lookup is executed as +a single operation, the lookups need to be executed on Coordinators for +sharded collections in cluster deployments, and only primary indexes and no +projections can be utilized. +{{< /info >}} + +`DOCUMENT(collection, id) → doc` + +Return the document identified by `id` (document key or identifier) from the +specified `collection`. + +If the document cannot be found, `null` will be returned. +If there is a mismatch between the `collection` passed and the collection in +the document identifier, then `null` will be returned, too. + +The `id` parameter can also be an array of document keys or identifiers. In this +case, the function will return an array of all documents that could be found. +The results are not guaranteed to be in the requested order. Documents that +could not be found are not indicated in the result (no `null` values) and do +also not raise warnings. + +- **collection** (string): name of a collection +- **id** (string\|array): a document key, a document identifier, or an array of + document keys, identifiers, or both +- returns **doc** (document\|array\|null): the found document (or `null` if it + was not found), or an array of all found documents **in any order** + +**Examples** + +```aql +--- +name: FUNCTION_DOCUMENT_1 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT( persons, "persons/alice" ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_2 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT( persons, "alice" ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_3 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT( persons, [ "persons/alice", "persons/bob" ] ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_4 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT( persons, [ "alice", "bob" ] ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_5 +description: '' +dataset: knows_graph +bindVars: + { + "@coll": "persons", + "key": "alice" + } +--- +RETURN DOCUMENT( @@coll, @key ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_6 +description: '' +dataset: knows_graph +bindVars: + { + "@coll": "persons", + "keys": ["alice", "bob"] + } +--- +RETURN DOCUMENT( @@coll, @keys ) +``` + +--- + +`DOCUMENT(id) → doc` + +The function can also be used with a single `id` parameter as follows: + +- **id** (string\|array): a document identifier, or an array of identifiers +- returns **doc** (document\|array\|null): the found document (or `null` if it + was not found), or an array of the found documents **in any order** + +**Examples** + +```aql +--- +name: FUNCTION_DOCUMENT_7 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT("persons/alice") +``` + +```aql +--- +name: FUNCTION_DOCUMENT_8 +description: '' +dataset: knows_graph +--- +RETURN DOCUMENT( [ "persons/alice", "persons/bob" ] ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_9 +description: '' +dataset: knows_graph +bindVars: + { + "key": "persons/alice" + } +--- +RETURN DOCUMENT( @key ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_10 +description: '' +dataset: knows_graph +bindVars: + { + "keys": ["persons/alice", "persons/bob"] + } +--- +RETURN DOCUMENT( @keys ) +``` + +```aql +--- +name: FUNCTION_DOCUMENT_11 +description: '' +dataset: knows_graph +bindVars: + { + "key": "bob" + } +--- +RETURN DOCUMENT( CONCAT("persons/", @key) ) +``` + +### LENGTH() + +`LENGTH(coll) → documentCount` + +Determine the amount of documents in a collection. + +It calls [`COLLECTION_COUNT()`](#collection_count) internally. + +- **coll** (collection): a collection (not string) +- returns **documentCount** (number): the total amount of documents in *coll* + +`LENGTH()` can also determine the [number of elements](array.md#length) in an array, +the [number of attribute keys](document-object.md#length) of an object / document and +the [character length](string.md#length) of a string. + +### SHARD_ID() + +`SHARD_ID(collection, shardKeys) → shardId` + +Return the shard in a collection that contains the specified shard keys. + +- **collection** (string): a collection name +- **shardKeys** (object): a set of shard keys and values. Any missing shard key + is substituted with the `null` value. +- returns **shardId** (string): the responsible shard for the specified + shard keys in the given collection. On deployments other than clusters, + the collection name itself is returned. + +```aql +--- +name: shard_id1 +description: '' +type: cluster +dataset: observationsSampleDataset +--- +RETURN SHARD_ID("observations", { "time": "2021-05-25 07:15:00", "subject": "xh458", "val": 10 }) +``` + +## Hash functions + +### HASH() + +`HASH(value) → hashNumber` + +Calculate a hash value for *value*. + +- **value** (any): an element of arbitrary type +- returns **hashNumber** (number): a hash value of *value* + +*value* is not required to be a string, but can have any data type. The calculated +hash value will take the data type of *value* into account, so for example the +number *1* and the string *"1"* will have different hash values. For arrays the +hash values will be equal if the arrays contain exactly the same values +(including value types) in the same order. For objects the same hash values will +be created if the objects have exactly the same attribute names and values +(including value types). The order in which attributes appear inside objects +is not important for hashing. + +The hash value returned by this function is a number. The hash algorithm is not +guaranteed to remain the same in future versions of ArangoDB. The hash values +should therefore be used only for temporary calculations, e.g. to compare if two +documents are the same, or for grouping values in queries. + +### MINHASH() + +`MINHASH(values, numHashes) → hashes` + +Calculate MinHash signatures for the *values* using locality-sensitive hashing. +The result can be used to approximate the Jaccard similarity of sets. + +- **values** (array): an array with elements of arbitrary type to hash +- **numHashes** (number): the size of the MinHash signature. Must be + greater or equal to `1`. The signature size defines the probabilistic error + (`err = rsqrt(numHashes)`). For an error amount that does not exceed 5% + (`0.05`), use a size of `1 / (0.05 * 0.05) = 400`. +- returns **hashes** (array): an array of strings with the encoded hash values + +**Examples** + +```aql +--- +name: aqlMinHash +description: '' +--- +RETURN MINHASH(["foo", "bar", "baz"], 5) +``` + +### MINHASH_COUNT() + +`MINHASH_COUNT(error) → numHashes` + +Calculate the number of hashes (MinHash signature size) needed to not exceed the +specified error amount. + +- **error** (number): the probabilistic error you can tolerate in the range `[0, 1)` +- returns **numHashes** (number): the required number of hashes to not exceed + the specified error amount + +**Examples** + +```aql +--- +name: aqlMinHashCount +description: '' +--- +RETURN MINHASH_COUNT(0.05) +``` + +### MINHASH_ERROR() + +`MINHASH_ERROR(numHashes) → error` + +Calculate the error amount based on the number of hashes (MinHash signature size). + +- **numHashes** (number): the number of hashes you want to check +- returns **error** (number): the probabilistic error to expect with the specified + number of hashes + +**Examples** + +```aql +--- +name: aqlMinHashError +description: '' +--- +RETURN MINHASH_ERROR(400) +``` + +### String-based hashing + +See the following string functions: + +- [`CRC32()`](string.md#crc32) +- [`FNV64()`](string.md#fnv64) +- [`MD5()`](string.md#md5) +- [`SHA1()`](string.md#sha1) +- [`SHA512()`](string.md#sha512) + +## Function calling + +### APPLY() + +`APPLY(functionName, arguments) → retVal` + +Dynamically call the function *funcName* with the arguments specified. +Arguments are given as array and are passed as separate parameters to +the called function. + +Both built-in and user-defined functions can be called. + +- **funcName** (string): a function name +- **arguments** (array, *optional*): an array with elements of arbitrary type +- returns **retVal** (any): the return value of the called function + +```aql +APPLY( "SUBSTRING", [ "this is a test", 0, 7 ] ) +// "this is" +``` + +### CALL() + +`CALL(funcName, arg1, arg2, ... argN) → retVal` + +Dynamically call the function *funcName* with the arguments specified. +Arguments are given as multiple parameters and passed as separate +parameters to the called function. + +Both built-in and user-defined functions can be called. + +- **funcName** (string): a function name +- **args** (any, *repeatable*): an arbitrary number of elements as + multiple arguments, can be omitted +- returns **retVal** (any): the return value of the called function + +```aql +CALL( "SUBSTRING", "this is a test", 0, 4 ) +// "this" +``` + +## Other functions + +### ASSERT() / WARN() + +`ASSERT(expr, message) → retVal`\ +`WARN(expr, message) → retVal` + +The two functions evaluate an expression. In case the expression evaluates to +*true* both functions will return *true*. If the expression evaluates to +*false* *ASSERT* will throw an error and *WARN* will issue a warning and return +*false*. This behavior allows the use of *ASSERT* and *WARN* in `FILTER` +conditions. + +- **expr** (expression): AQL expression to be evaluated +- **message** (string): message that will be used in exception or warning if expression evaluates to false +- returns **retVal** (bool): returns true if expression evaluates to true + +```aql +FOR i IN 1..3 FILTER ASSERT(i > 0, "i is not greater 0") RETURN i +FOR i IN 1..3 FILTER WARN(i < 2, "i is not smaller 2") RETURN i +``` + +### IN_RANGE() + +`IN_RANGE(value, low, high, includeLow, includeHigh) → included` + +Returns true if *value* is greater than (or equal to) *low* and less than +(or equal to) *high*. The values can be of different types. They are compared +as described in [Type and value order](../fundamentals/type-and-value-order.md) and +is thus identical to the comparison operators `<`, `<=`, `>` and `>=` in +behavior. + +- **value** (any): an element of arbitrary type +- **low** (any): minimum value of the desired range +- **high** (any): maximum value of the desired range +- **includeLow** (bool): whether the minimum value shall be included in + the range (left-closed interval) or not (left-open interval) +- **includeHigh** (bool): whether the maximum value shall be included in + the range (right-closed interval) or not (right-open interval) +- returns **included** (bool): whether *value* is in the range + +If *low* and *high* are the same, but *includeLow* and/or *includeHigh* is set +to `false`, then nothing will match. If *low* is greater than *high* nothing will +match either. + +{{< info >}} +The regular `IN_RANGE()` function cannot utilize indexes, unlike its +ArangoSearch counterpart which can use the View index. +{{< /info >}} + +```aql +--- +name: aqlMiscInRange_1 +description: '' +--- +LET value = 4 +RETURN IN_RANGE(value, 3, 5, true, true) +/* same as: + RETURN value >= 3 AND value <= 5 +*/ +``` + + + +```aql +--- +name: aqlMiscInRange_2 +description: '' +--- +FOR value IN 2..6 + RETURN { value, in_range: IN_RANGE(value, 3, 5, false, true) } + /* same as: + RETURN { value, in_range: value > 3 AND value <= 5 } + */ +``` + + + +```aql +--- +name: aqlMiscInRange_3 +description: '' +--- +LET coll = [ + { text: "fennel" }, + { text: "fox grape" }, + { text: "forest strawberry" }, + { text: "fungus" } +] +FOR doc IN coll + FILTER IN_RANGE(doc.text,"fo", "fp", true, false) // values with prefix "fo" + /* same as: + FILTER doc.text >= "fo" AND doc.text < "fp" + */ + RETURN doc +``` + +### PREGEL_RESULT() + +`PREGEL_RESULT(jobId, withId) → results` + +Allows to access results of a Pregel job that are only held in memory. +See [Pregel AQL integration](../../data-science/pregel/_index.md#aql-integration). + +- **jobId** (string): the `id` of a Pregel job +- **withId** (bool): if enabled, then the document `_id` is returned in + addition to the `_key` for each vertex +- returns **results** (array): an array of objects, one element per vertex, with + the attributes computed by the Pregel algorithm and the document key (and + optionally identifier) + +## Internal functions + +The following functions are used during development of ArangoDB as a database +system, primarily for unit testing. They are not intended to be used by end +users, especially not in production environments. + +### FAIL() + +`FAIL(reason)` + +Let a query fail on purpose. Can be used in a conditional branch, or to verify +if lazy evaluation / short circuiting is used for instance. + +- **reason** (string): an error message +- returns nothing, because the query is aborted + +```aql +RETURN 1 == 1 ? "okay" : FAIL("error") // "okay" +RETURN 1 == 1 || FAIL("error") ? true : false // true +RETURN 1 == 2 && FAIL("error") ? true : false // false +RETURN 1 == 1 && FAIL("error") ? true : false // aborted with error +``` + +### NOOPT() / NOEVAL() + +`NOOPT(value) → retVal` + +No-operation that prevents certain query compile-time and run-time optimizations. +Constant expressions can be forced to be evaluated at runtime with this. +This function is marked as non-deterministic so its argument withstands +query optimization. + +`NOEVAL(value) → retVal` + +Same as `NOOPT()`, except that it is marked as deterministic. + +There is no need to call these functions explicitly, they are mainly used for +internal testing. + +- **value** (any): a value of arbitrary type +- returns **retVal** (any): *value* + +```aql +// differences in execution plan (explain) +FOR i IN 1..3 RETURN (1 + 1) // const assignment +FOR i IN 1..3 RETURN NOOPT(1 + 1) // simple expression +FOR i IN 1..3 RETURN NOEVAL(1 + 1) // simple expression + +RETURN NOOPT( 123 ) // evaluates 123 at runtime +RETURN NOOPT( CONCAT("a", "b") ) // evaluates concatenation at runtime +``` + +### PASSTHRU() + +`PASSTHRU(value) → retVal` + +Simply returns its call argument unmodified. There is no need to call this function +explicitly, it is mainly used for internal testing. + +- **value** (any): a value of arbitrary type +- returns **retVal** (any): *value* + +### SCHEMA_GET() + +`SCHEMA_GET(collection) → schema` + +Return the schema definition as defined in the properties of the +specified collection. + +- **collection** (string): name of a collection +- returns **schema** (object): schema definition object + +```aql +RETURN SCHEMA_GET("myColl") +``` + +### SCHEMA_VALIDATE() + +`SCHEMA_VALIDATE(doc, schema) → result` + +Test if the given document is valid according to the schema definition. + +- **doc** (doc): document +- **schema** (object): schema definition object +- returns **result** (object): an object with the following attributes: + - **valid** (bool): `true` if the document fulfills the schema's requirements, + otherwise it will be `false` and *errorMessage* will be set + - **errorMessage** (string): details about the validation failure + +If the input document **doc** is not an object, the function will return +a *null* value and register a warning in the query. + +Using an empty **schema** object is equivalent to specifying a **schema** +value of *null*, which will make all input objects successfully pass the +validation. + +### SLEEP() + +`SLEEP(seconds) → null` + +Wait for a certain amount of time before continuing the query. + +- **seconds** (number): amount of time to wait +- returns a *null* value + +```aql +SLEEP(1) // wait 1 second +SLEEP(0.02) // wait 20 milliseconds +``` + +### V8() + +`V8(expression) → retVal` + +No-operation that enforces the usage of the V8 JavaScript engine. There is +no need to call this function explicitly, it is mainly used for internal +testing. + +- **expression** (any): arbitrary expression +- returns **retVal** (any): the return value of the *expression* + +```aql +// differences in execution plan (explain) +FOR i IN 1..3 RETURN (1 + 1) // const assignment +FOR i IN 1..3 RETURN V8(1 + 1) // simple expression +``` + +### VERSION() + +`VERSION() → serverVersion` + +Returns the server version as a string. In a cluster, returns the version +of the Coordinator. + +- returns **serverVersion** (string): the server version string + +```aql +RETURN VERSION() // e.g. "3.10.0" +``` diff --git a/site/content/arangodb/oem/aql/functions/numeric.md b/site/content/arangodb/oem/aql/functions/numeric.md new file mode 100644 index 0000000000..401bae6b71 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/numeric.md @@ -0,0 +1,770 @@ +--- +title: Numeric functions in AQL +menuTitle: Numeric +weight: 45 +description: >- + AQL offers functions for numeric calculations +--- +## ABS() + +`ABS(value) → unsignedValue` + +Return the absolute part of *value*. + +- **value** (number): any number, positive or negative +- returns **unsignedValue** (number): the number without + or - sign + +```aql +ABS(-5) // 5 +ABS(+5) // 5 +ABS(3.5) // 3.5 +``` + +## ACOS() + +`ACOS(value) → num` + +Return the arccosine of *value*. + +- **value** (number): the input value +- returns **num** (number\|null): the arccosine of *value*, or *null* if *value* is + outside the valid range -1 and 1 (inclusive) + +```aql +ACOS(-1) // 3.141592653589793 +ACOS(0) // 1.5707963267948966 +ACOS(1) // 0 +ACOS(2) // null +``` + +## ASIN() + +`ASIN(value) → num` + +Return the arcsine of *value*. + +- **value** (number): the input value +- returns **num** (number\|null): the arcsine of *value*, or *null* if *value* is + outside the valid range -1 and 1 (inclusive) + +```aql +ASIN(1) // 1.5707963267948966 +ASIN(0) // 0 +ASIN(-1) // -1.5707963267948966 +ASIN(2) // null +``` + +## ATAN() + +`ATAN(value) → num` + +Return the arctangent of *value*. + +- **value** (number): the input value +- returns **num** (number): the arctangent of *value* + +```aql +ATAN(-1) // -0.7853981633974483 +ATAN(0) // 0 +ATAN(10) // 1.4711276743037347 +``` + +## ATAN2() + +`ATAN2(y, x) → num` + +Return the arctangent of the quotient of *y* and *x*. + +```aql +ATAN2(0, 0) // 0 +ATAN2(1, 0) // 1.5707963267948966 +ATAN2(1, 1) // 0.7853981633974483 +ATAN2(-10, 20) // -0.4636476090008061 +``` + +## AVERAGE() + +`AVERAGE(numArray) → mean` + +Return the average (arithmetic mean) of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **mean** (number\|null): the average value of *numArray*. If the array is + empty or contains *null* values only, *null* will be returned. + +```aql +AVERAGE( [5, 2, 9, 2] ) // 4.5 +AVERAGE( [ -3, -5, 2 ] ) // -2 +AVERAGE( [ 999, 80, 4, 4, 4, 3, 3, 3 ] ) // 137.5 +``` + +## AVG() + +This is an alias for [`AVERAGE()`](#average). + +## CEIL() + +`CEIL(value) → roundedValue` + +Return the integer closest but not less than *value*. + +To round downward, see [`FLOOR()`](#floor).\ +To round to the nearest integer value, see [`ROUND()`](#round). + +- **value** (number): any number +- returns **roundedValue** (number): the value rounded to the ceiling + +```aql +CEIL(2.49) // 3 +CEIL(2.50) // 3 +CEIL(-2.50) // -2 +CEIL(-2.51) // -2 +``` + +## COS() + +`COS(value) → num` + +Return the cosine of *value*. + +- **value** (number): the input value +- returns **num** (number): the cosine of *value* + +```aql +COS(1) // 0.5403023058681398 +COS(0) // 1 +COS(-3.141592653589783) // -1 +COS(RADIANS(45)) // 0.7071067811865476 +``` + +## COSINE_SIMILARITY() + +Introduced in: v3.9.0 + +`COSINE_SIMILARITY(x, y) → num` + +Return the [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) +between *x* and *y*. + +To calculate the distance, see [`L1_DISTANCE()`](#l1_distance) and +[`L2_DISTANCE()`](#l2_distance). + +- **x** (array): first input array +- **y** (array): second input array +- returns **num** (number\|array): the cosine similarity value. + If one of the inputs is a nested (2D) array, then an array is returned. + The length of each 2D array row should be equal to the length of second input + array in that case. + +In case of invalid input values the function returns **null** and produces a warning. + +```aql +COSINE_SIMILARITY([0,1], [1,0]) // 0 +COSINE_SIMILARITY([[0,1,0,1],[1,0,0,1],[1,1,1,0],[0,0,0,1]], [1,1,1,1]) // [0.707, 0.707, 0.866, 0.5] +COSINE_SIMILARITY([-1,0], [1,0]) // -1 +``` + +## DECAY_GAUSS() + +Introduced in: v3.9.0 + +`DECAY_GAUSS(value, origin, scale, offset, decay) → score` + +Calculate the score for one or multiple values with a **Gaussian function** that +decays depending on the distance of a numeric value from a user-given origin. + +- **value** (number\|array): the input value or an array with input values +- **origin** (number): the point of origin used for calculating the distance +- **scale** (number): defines the distance from `origin` + `offset` at which + the computed score will equal the `decay` parameter +- **offset** (number): the decay function will be evaluated for distance values + greater than the defined offset +- **decay** (number): the decay parameter defines how input values are scored + at the distance given by the `scale` parameter +- returns **score** (number\|array): a single score or an array of scores + depending on the type of the input `value` + +```aql +DECAY_GAUSS(41, 40, 5, 5, 0.5) // 1 +DECAY_GAUSS([20, 41], 40, 5, 5, 0.5) // [0.0019531250000000017, 1.0] +DECAY_GAUSS(49.9889, 49.987, 0.001, 0.001, 0.2) // 0.2715403018822964 +``` + +## DECAY_EXP() + +Introduced in: v3.9.0 + +`DECAY_EXP(value, origin, scale, offset, decay) → num, array` + +Calculate the score for one or multiple values with an **exponential function** +that decays depending on the distance of a numeric value from a user-given origin. + +- **value** (number\|array): the input value or an array with input values +- **origin** (number): the point of origin used for calculating the distance +- **scale** (number): defines the distance from `origin` + `offset` at which + the computed score will equal the `decay` parameter +- **offset** (number): the decay function will be evaluated for distance values + greater than the defined offset +- **decay** (number): the decay parameter defines how input values are scored + at the distance given by the `scale` parameter +- returns **score** (number\|array): a single score or an array of scores + depending on the type of the input `value` + +```aql +DECAY_EXP(41, 40, 5, 5, 0.7) // 1 +DECAY_EXP(2, 0, 10, 0, 0.2) // 0.7247796636776955 +DECAY_EXP(49.9889, 50, 0.001, 0.001, 0.2) // 8.717720806626885e-08 +``` + +## DECAY_LINEAR() + +Introduced in: v3.9.0 + +`DECAY_LINEAR(value, origin, scale, offset, decay) → score` + +Calculate the score for one or multiple values with a **linear function** that +decays depending on the distance of a numeric value from a user-given origin. + +- **value** (number\|array): the input value or an array with input values +- **origin** (number): the point of origin used for calculating the distance +- **scale** (number): defines the distance from `origin` + `offset` at which + the computed score will equal the `decay` parameter +- **offset** (number): the decay function will be evaluated for distance values + greater than the defined offset +- **decay** (number): the decay parameter defines how input values are scored + at the distance given by the `scale` parameter +- returns **score** (number\|array): a single score or an array of scores + depending on the type of the input `value` + +```aql +DECAY_LINEAR(41, 40, 5, 5, 0.5) // 1 +DECAY_LINEAR(9.8, 0, 10, 0, 0.2) // 0.21599999999999994 +DECAY_LINEAR(5..7, 0, 10, 0, 0.2) // [0.6, 0.52, 0.44] +``` + +## DEGREES() + +`DEGREES(rad) → num` + +Return the angle converted from radians to degrees. + +- **rad** (number): the input value +- returns **num** (number): the angle in degrees + +```aql +DEGREES(0.7853981633974483) // 45 +DEGREES(0) // 0 +DEGREES(3.141592653589793) // 180 +``` + +## EXP() + +`EXP(value) → num` + +Return Euler's constant (2.71828...) raised to the power of *value*. + +- **value** (number): the input value +- returns **num** (number): Euler's constant raised to the power of *value* + +```aql +EXP(1) // 2.718281828459045 +EXP(10) // 22026.46579480671 +EXP(0) // 1 +``` + +## EXP2() + +`EXP2(value) → num` + +Return 2 raised to the power of *value*. + +- **value** (number): the input value +- returns **num** (number): 2 raised to the power of *value* + +```aql +EXP2(16) // 65536 +EXP2(1) // 2 +EXP2(0) // 1 +``` + +## FLOOR() + +`FLOOR(value) → roundedValue` + +Return the integer closest but not greater than *value*. + +To round upward, see [`CEIL()`](#ceil).\ +To round to the nearest integer value, see [`ROUND()`](#round). + +- **value** (number): any number +- returns **roundedValue** (number): the value rounded downward + +```aql +FLOOR(2.49) // 2 +FLOOR(2.50) // 2 +FLOOR(-2.50) // -3 +FLOOR(-2.51) // -3 +``` + +## LOG() + +`LOG(value) → num` + +Return the natural logarithm of *value*. The base is Euler's +constant (2.71828...). + +- **value** (number): the input value +- returns **num** (number\|null): the natural logarithm of *value*, or *null* if *value* is + equal or less than 0 + +```aql +LOG(2.718281828459045) // 1 +LOG(10) // 2.302585092994046 +LOG(0) // null +``` + +## LOG2() + +`LOG2(value) → num` + +Return the base 2 logarithm of *value*. + +- **value** (number): the input value +- returns **num** (number\|null): the base 2 logarithm of *value*, or *null* if *value* is + equal or less than 0 + +```aql +LOG2(1024) // 10 +LOG2(8) // 3 +LOG2(0) // null +``` + +## LOG10() + +`LOG10(value) → num` + +Return the base 10 logarithm of *value*. + +- **value** (number): the input value +- returns **num** (number): the base 10 logarithm of *value*, or *null* if *value* is + equal or less than 0 + +```aql +LOG10(10000) // 4 +LOG10(10) // 1 +LOG10(0) // null +``` + +## L1_DISTANCE() + +Introduced in: v3.9.0 + +`L1_DISTANCE(x, y) → num` + +Return the [Manhattan distance](https://en.wikipedia.org/wiki/Taxicab_geometry) +between *x* and *y*. + +To calculate the similarity, see [`COSINE_SIMILARITY()`](#cosine_similarity). + +- **x** (array): first input array +- **y** (array): second input array +- returns **num** (number\|array): the L1 distance value. + If one of the inputs is a nested (2D) array, then an array is returned. + The length of each inner array should be equal to the length of second input + array in that case. + +In case of invalid input values the function returns **null** and produces a warning. + +```aql +L1_DISTANCE([-1,-1], [2,2]) // 6 +L1_DISTANCE([[1,2,3],[-1,-2,-3],[3,4,5],[-5,2,1]], [1,1,1]) // [3,9,9,7] +L1_DISTANCE([1.5], [3]) // 1.5 +``` + +## L2_DISTANCE() + +Introduced in: v3.9.0 + +`L2_DISTANCE(x,y) → num` + +Return the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) +between *x* and *y*. + +To calculate the similarity, see [`COSINE_SIMILARITY()`](#cosine_similarity). + +- **x** (array): first input array +- **y** (array): second input array +- returns **num** (number\|array): the L2 distance value. + If one of the inputs is a nested (2D) array, then an array is returned. + The length of each inner array should be equal to the length of second input + array in that case. + +In case of invalid input values the function returns **null** and produces a warning. + +```aql +L2_DISTANCE([1,1], [5,2]) // 4.1231056256176606 +L2_DISTANCE([[1,2,3], [4,5,6], [7,8,9]], [3,2,1]) // [2.8284271247461903, 5.916079783099616, 10.770329614269007] +L2_DISTANCE([0,1], [1,0]) // 1.4142135623730951 +``` + +## MAX() + +`MAX(anyArray) → max` + +Return the greatest element of *anyArray*. The array is not limited to numbers. +Also see [type and value order](../fundamentals/type-and-value-order.md). + +- **anyArray** (array): an array of numbers, *null* values are ignored +- returns **max** (any\|null): the element with the greatest value. If the array is + empty or contains *null* values only, the function will return *null*. + +```aql +MAX( [5, 9, -2, null, 1] ) // 9 +MAX( [ null, null ] ) // null +``` + +## MEDIAN() + +`MEDIAN(numArray) → median` + +Return the median value of the values in *array*. + +The array is sorted and the element in the middle is returned. If the array has an +even length of elements, the two center-most elements are interpolated by calculating +the average value (arithmetic mean). + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **median** (number\|null): the median of *numArray*. If the array is + empty or contains *null* values only, the function will return *null*. + +```aql +MEDIAN( [ 1, 2, 3] ) // 2 +MEDIAN( [ 1, 2, 3, 4 ] ) // 2.5 +MEDIAN( [ 4, 2, 3, 1 ] ) // 2.5 +MEDIAN( [ 999, 80, 4, 4, 4, 3, 3, 3 ] ) // 4 +``` + +## MIN() + +`MIN(anyArray) → min` + +Return the smallest element of *anyArray*. The array is not limited to numbers. +Also see [type and value order](../fundamentals/type-and-value-order.md). + +- **anyArray** (array): an array of numbers, *null* values are ignored +- returns **min** (any\|null): the element with the smallest value. If the array is + empty or contains *null* values only, the function will return *null*. + +```aql +MIN( [5, 9, -2, null, 1] ) // -2 +MIN( [ null, null ] ) // null +``` + +## PERCENTILE() + +`PERCENTILE(numArray, n, method) → percentile` + +Return the *n*th percentile of the values in *numArray*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- **n** (number): must be between 0 (excluded) and 100 (included) +- **method** (string, *optional*): "rank" (default) or "interpolation" +- returns **percentile** (number\|null): the *n*th percentile, or *null* if the + array is empty or only *null* values are contained in it or the percentile + cannot be calculated + +```aql +PERCENTILE( [1, 2, 3, 4], 50 ) // 2 +PERCENTILE( [1, 2, 3, 4], 50, "rank" ) // 2 +PERCENTILE( [1, 2, 3, 4], 50, "interpolation" ) // 2.5 +``` + +## PI() + +`PI() → pi` + +Return pi. + +- returns **pi** (number): the first few significant digits of pi (3.141592653589793) + +```aql +PI() // 3.141592653589793 +``` + +## POW() + +`POW(base, exp) → num` + +Return the *base* to the exponent *exp*. + +- **base** (number): the base value +- **exp** (number): the exponent value +- returns **num** (number): the exponentiated value + +```aql +POW( 2, 4 ) // 16 +POW( 5, -1 ) // 0.2 +POW( 5, 0 ) // 1 +``` + +## PRODUCT() + +`PRODUCT(numArray) → product` + +Return the product of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **product** (number): the product of all values in *numArray*. If the array + is empty or only *null* values are contained in the array, *1* will be returned. + +```aql +PRODUCT( [1, 2, 3, 4] ) // 24 +PRODUCT( [null, -5, 6] ) // -30 +PRODUCT( [ ] ) // 1 +``` + +## RADIANS() + +`RADIANS(deg) → num` + +Return the angle converted from degrees to radians. + +- **deg** (number): the input value +- returns **num** (number): the angle in radians + +```aql +RADIANS(180) // 3.141592653589793 +RADIANS(90) // 1.5707963267948966 +RADIANS(0) // 0 +``` + +## RAND() + +`RAND() → randomNumber` + +Return a pseudo-random number between 0 and 1. + +- returns **randomNumber** (number): a number greater than 0 and less than 1 + +```aql +RAND() // 0.3503170117504508 +RAND() // 0.6138226173882478 +``` + +Complex example: + +```aql +LET coinFlips = ( + FOR i IN 1..100000 + RETURN RAND() > 0.5 ? "heads" : "tails" +) +RETURN MERGE( + FOR flip IN coinFlips + COLLECT f = flip WITH COUNT INTO count + RETURN { [f]: count } +) +``` + +Result: + +```json +[ + { + "heads": 49902, + "tails": 50098 + } +] +``` + +## RANGE() + +`RANGE(start, stop, step) → numArray` + +Return an array of numbers in the specified range, optionally with increments +other than 1. The *start* and *stop* arguments are truncated to integers +unless a *step* argument is provided. + +Also see the [range operator](../operators.md#range-operator) for ranges +with integer bounds and a step size of 1. + +- **start** (number): the value to start the range at (inclusive) +- **stop** (number): the value to end the range with (inclusive) +- **step** (number, *optional*): how much to increment in every step, + the default is *1.0* +- returns **numArray** (array): all numbers in the range as array + +```aql +RANGE(1, 4) // [ 1, 2, 3, 4 ] +RANGE(1, 4, 2) // [ 1, 3 ] +RANGE(1, 4, 3) // [ 1, 4 ] +RANGE(1.5, 2.5) // [ 1, 2 ] +RANGE(1.5, 2.5, 1) // [ 1.5, 2.5 ] +RANGE(1.5, 2.5, 0.5) // [ 1.5, 2, 2.5 ] +RANGE(-0.75, 1.1, 0.5) // [ -0.75, -0.25, 0.25, 0.75 ] +``` + +## ROUND() + +`ROUND(value) → roundedValue` + +Return the integer closest to *value*. + +- **value** (number): any number +- returns **roundedValue** (number): the value rounded to the closest integer + +```aql +ROUND(2.49) // 2 +ROUND(2.50) // 3 +ROUND(-2.50) // -2 +ROUND(-2.51) // -3 +``` + +Rounding towards zero, also known as `trunc()` in C/C++, can be achieved with +a combination of the [ternary operator](../operators.md#ternary-operator), +[`CEIL()`](#ceil) and [`FLOOR()`](#floor): + +```aql +value >= 0 ? FLOOR(value) : CEIL(value) +``` + +## SIN() + +`SIN(value) → num` + +Return the sine of *value*. + +- **value** (number): the input value +- returns **num** (number): the sine of *value* + +```aql +SIN(3.141592653589783 / 2) // 1 +SIN(0) // 0 +SIN(-3.141592653589783 / 2) // -1 +SIN(RADIANS(270)) // -1 +``` + +## SQRT() + +`SQRT(value) → squareRoot` + +Return the square root of *value*. + +- **value** (number): a number +- returns **squareRoot** (number): the square root of *value* + +```aql +SQRT(9) // 3 +SQRT(2) // 1.4142135623730951 +``` + +Other roots can be calculated with [`POW()`](#pow) like `POW(value, 1/n)`: + +```aql +// 4th root of 8*8*8*8 = 4096 +POW(4096, 1/4) // 8 + +// cube root of 3*3*3 = 27 +POW(27, 1/3) // 3 + +// square root of 3*3 = 9 +POW(9, 1/2) // 3 +``` + +## STDDEV_POPULATION() + +`STDDEV_POPULATION(numArray) → num` + +Return the population standard deviation of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **num** (number\|null): the population standard deviation of *numArray*. + If the array is empty or only *null* values are contained in the array, + *null* will be returned. + +```aql +STDDEV_POPULATION( [ 1, 3, 6, 5, 2 ] ) // 1.854723699099141 +``` + +## STDDEV_SAMPLE() + +`STDDEV_SAMPLE(numArray) → num` + +Return the sample standard deviation of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **num** (number\|null): the sample standard deviation of *numArray*. + If the array is empty or only *null* values are contained in the array, + *null* will be returned. + +```aql +STDDEV_SAMPLE( [ 1, 3, 6, 5, 2 ] ) // 2.0736441353327724 +``` + +## STDDEV() + +This is an alias for [`STDDEV_POPULATION()`](#stddev_population). + +## SUM() + +`SUM(numArray) → sum` + +Return the sum of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **sum** (number): the total of all values in *numArray*. If the array + is empty or only *null* values are contained in the array, *0* will be returned. + +```aql +SUM( [1, 2, 3, 4] ) // 10 +SUM( [null, -5, 6] ) // 1 +SUM( [ ] ) // 0 +``` + +## TAN() + +`TAN(value) → num` + +Return the tangent of *value*. + +- **value** (number): the input value +- returns **num** (number): the tangent of *value* + +```aql +TAN(10) // 0.6483608274590866 +TAN(5) // -3.380515006246586 +TAN(0) // 0 +``` + +## VARIANCE_POPULATION() + +`VARIANCE_POPULATION(numArray) → num` + +Return the population variance of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **num** (number\|null): the population variance of *numArray*. + If the array is empty or only *null* values are contained in the array, + *null* will be returned. + +```aql +VARIANCE_POPULATION( [ 1, 3, 6, 5, 2 ] ) // 3.4400000000000004 +``` + +## VARIANCE_SAMPLE() + +`VARIANCE_SAMPLE(array) → num` + +Return the sample variance of the values in *array*. + +- **numArray** (array): an array of numbers, *null* values are ignored +- returns **num** (number\|null): the sample variance of *numArray*. + If the array is empty or only *null* values are contained in the array, + *null* will be returned. + +```aql +VARIANCE_SAMPLE( [ 1, 3, 6, 5, 2 ] ) // 4.300000000000001 +``` + +## VARIANCE() + +This is an alias for [`VARIANCE_POPULATION()`](#variance_population). diff --git a/site/content/arangodb/oem/aql/functions/string.md b/site/content/arangodb/oem/aql/functions/string.md new file mode 100644 index 0000000000..56313056d7 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/string.md @@ -0,0 +1,2070 @@ +--- +title: String functions in AQL +menuTitle: String +weight: 50 +description: >- + AQL offers functions for string processing +--- +## CHAR_LENGTH() + +`CHAR_LENGTH(str) → length` + +Return the number of characters in `str` (not byte length). + +| Input | Length | +|--------|--------| +| String | Number of Unicode characters | +| Number | Number of Unicode characters that represent the number | +| Array | Number of Unicode characters from the resulting stringification | +| Object | Number of Unicode characters from the resulting stringification | +| true | 4 | +| false | 5 | +| null | 0 | + +- **str** (string): a string. If a number is passed, it will be casted to string first. +- returns **length** (number): the character length of `str` (not byte length) + +**Examples** + +```aql +--- +name: aqlCharLength_1 +description: '' +--- +RETURN CHAR_LENGTH("foo") +``` + +```aql +--- +name: aqlCharLength_2 +description: '' +--- +LET value = {foo: "bar"} +RETURN { + str: JSON_STRINGIFY(value), + len: CHAR_LENGTH(value) +} +``` + +## CONCAT() + +`CONCAT(value1, value2, ... valueN) → str` + +Concatenate the values passed as `value1` to `valueN`. + +- **values** (any, *repeatable*): elements of arbitrary type (at least 1) +- returns **str** (string): a concatenation of the elements. `null` values + are ignored. Array and object values are JSON-encoded in their entirety. + +**Examples** + +```aql +--- +name: aqlConcatStrings_1 +description: '' +--- +RETURN CONCAT("foo", "bar", "baz") +``` + +```aql +--- +name: aqlConcatNumbers_1 +description: '' +--- +RETURN CONCAT(1, 2, 3) +``` + +```aql +--- +name: aqlConcatPrimitiveTypes_1 +description: '' +--- +RETURN CONCAT(null, false, 0, true, "") +``` + +```aql +--- +name: aqlConcatCompoundTypes_1 +description: '' +--- +RETURN CONCAT([5, 6], {foo: "bar"}) +``` + +--- + +`CONCAT(anyArray) → str` + +If a single array is passed to `CONCAT()`, its members are concatenated. + +- **anyArray** (array): array with elements of arbitrary type +- returns **str** (string): a concatenation of the array elements. `null` values + are ignored. Array and object values are JSON-encoded in their entirety. + +```aql +--- +name: aqlConcatStrings_2 +description: '' +--- +RETURN CONCAT( [ "foo", "bar", "baz" ] ) +``` + +```aql +--- +name: aqlConcatNumbers_2 +description: '' +--- +RETURN CONCAT( [1, 2, 3] ) +``` + +```aql +--- +name: aqlConcatPrimitiveTypes_2 +description: '' +--- +RETURN CONCAT( [null, false, 0, true, ""] ) +``` + +```aql +--- +name: aqlConcatCompoundTypes_2 +description: '' +--- +RETURN CONCAT( [[5, 6], {foo: "bar"}] ) +``` + +## CONCAT_SEPARATOR() + +`CONCAT_SEPARATOR(separator, value1, value2, ... valueN) → joinedString` + +Concatenate the strings passed as arguments `value1` to `valueN` using the +*separator* string. + +- **separator** (string): an arbitrary separator string +- **values** (string\|array, *repeatable*): strings or arrays of strings as multiple + arguments (at least 1) +- returns **joinedString** (string): a concatenated string of the elements, using + `separator` as separator string. `null` values are ignored. Array and object + values are JSON-encoded in their entirety. + +**Examples** + +```aql +--- +name: aqlConcatSeparatorStrings_1 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", "foo", "bar", "baz") +``` + +```aql +--- +name: aqlConcatSeparatorNumbers_1 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", 1, 2, 3) +``` + +```aql +--- +name: aqlConcatSeparatorPrimitiveTypes_1 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", null, false, 0, true, "") +``` + +```aql +--- +name: aqlConcatSeparatorCompoundTypes_1 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", [5, 6], {foo: "bar"}) +``` + +--- + +`CONCAT_SEPARATOR(separator, anyArray) → joinedString` + +If a single array is passed as second argument to `CONCAT_SEPARATOR()`, its +members are concatenated. + +- **separator** (string): an arbitrary separator string +- **anyArray** (array): array with elements of arbitrary type +- returns **joinedString** (string): a concatenated string of the elements, using + `separator` as separator string. `null` values are ignored. Array and object + values are JSON-encoded in their entirety. + +```aql +--- +name: aqlConcatSeparatorStrings_2 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", ["foo", "bar", "baz"]) +``` + +```aql +--- +name: aqlConcatSeparatorNumbers_2 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", [1, 2, 3]) +``` + +```aql +--- +name: aqlConcatSeparatorPrimitiveTypes_2 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", [null, false, 0, true, ""]) +``` + +```aql +--- +name: aqlConcatSeparatorCompoundTypes_2 +description: '' +--- +RETURN CONCAT_SEPARATOR(", ", [[5, 6], {foo: "bar"}]) +``` + +## CONTAINS() + +`CONTAINS(text, search, returnIndex) → match` + +Check whether the string `search` is contained in the string `text`. +The string matching performed by `CONTAINS()` is case-sensitive. + +To determine if or at which position a value is included in an **array**, see the +[`POSITION()` array function](array.md#position). + +- **text** (string): the haystack +- **search** (string): the needle +- **returnIndex** (bool, *optional*): if set to `true`, the character position + of the match is returned instead of a boolean. The default is `false`. +- returns **match** (bool\|number): by default, `true` is returned if `search` + is contained in `text`, and `false` otherwise. With `returnIndex` set to `true`, + the position of the first occurrence of `search` within `text` is returned + (starting at offset 0), or `-1` if it is not contained. + +**Examples** + +```aql +--- +name: aqlContainsMatch +description: '' +--- +RETURN CONTAINS("foobarbaz", "bar") +``` + +```aql +--- +name: aqlContains +description: '' +--- +RETURN CONTAINS("foobarbaz", "horse") +``` + +```aql +--- +name: aqlContainsMatchIndex +description: '' +--- +RETURN CONTAINS("foobarbaz", "bar", true) +``` + +```aql +--- +name: aqlContainsNoMatchIndex +description: '' +--- +RETURN CONTAINS("foobarbaz", "horse", true) +``` + +## COUNT() + +This is an alias for [`LENGTH()`](#length). + +## CRC32() + +`CRC32(text) → hash` + +Calculate the CRC32 checksum for `text` and return it in a hexadecimal +string representation. The polynomial used is `0x1EDC6F41`. The initial +value used is `0xFFFFFFFF`, and the final XOR value is also `0xFFFFFFFF`. + +- **text** (string): a string +- returns **hash** (string): CRC32 checksum as hex string + +**Examples** + +```aql +--- +name: aqlCrc32 +description: '' +--- +RETURN CRC32("foobar") +``` + +## ENCODE_URI_COMPONENT() + +`ENCODE_URI_COMPONENT(value) → encodedString` + +Return the URI component-encoded string of `value`. + +- **value** (string): a string +- returns **encodedString** (string): the URI component-encoded `value` + +**Examples** + +```aql +--- +name: aqlEncodeUriComponent +description: '' +--- +RETURN ENCODE_URI_COMPONENT("fünf %") +``` + +## FIND_FIRST() + +`FIND_FIRST(text, search, start, end) → position` + +Return the position of the first occurrence of the string `search` inside the +string `text`. Positions start at 0. + +- **text** (string): the haystack +- **search** (string): the needle +- **start** (number, *optional*): limit the search to a subset of the text, + beginning at `start` +- **end** (number, *optional*): limit the search to a subset of the text, + ending at `end` +- returns **position** (number): the character position of the match. If `search` + is not contained in `text`, -1 is returned. If `search` is empty, `start` is returned. + +**Examples** + +```aql +--- +name: aqlFindFirst_1 +description: '' +--- +RETURN FIND_FIRST("foobarbaz", "ba") +``` + +```aql +--- +name: aqlFindFirst_2 +description: '' +--- +RETURN FIND_FIRST("foobarbaz", "ba", 4) +``` + +```aql +--- +name: aqlFindFirst_3 +description: '' +--- +RETURN FIND_FIRST("foobarbaz", "ba", 0, 3) +``` + +## FIND_LAST() + +`FIND_LAST(text, search, start, end) → position` + +Return the position of the last occurrence of the string `search` inside the +string `text`. Positions start at 0. + +- **text** (string): the haystack +- **search** (string): the needle +- **start** (number, *optional*): limit the search to a subset of the text, + beginning at *start* +- **end** (number, *optional*): limit the search to a subset of the text, + ending at *end* +- returns **position** (number): the character position of the match. If `search` + is not contained in `text`, -1 is returned. + If `search` is empty, the string length is returned, or `end` + 1. + +**Examples** + +```aql +--- +name: aqlFindLast_1 +description: '' +--- +RETURN FIND_LAST("foobarbaz", "ba") +``` + +```aql +--- +name: aqlFindLast_2 +description: '' +--- +RETURN FIND_LAST("foobarbaz", "ba", 7) +``` + +```aql +--- +name: aqlFindLast_3 +description: '' +--- +RETURN FIND_LAST("foobarbaz", "ba", 0, 4) +``` + +## FNV64() + +`FNV64(text) → hash` + +Calculate the FNV-1A 64 bit hash for `text` and return it in a hexadecimal +string representation. + +- **text** (string): a string +- returns **hash** (string): FNV-1A hash as hex string + +**Examples** + +```aql +--- +name: aqlFnv64 +description: '' +--- +RETURN FNV64("foobar") +``` + +## IPV4_FROM_NUMBER() + +`IPV4_FROM_NUMBER(numericAddress) → stringAddress` + +Converts a numeric IPv4 address value into its string representation. + +- **numericAddress** (number): a numeric representation of an IPv4 address, for + example produced by [`IPV4_TO_NUMBER()`](#ipv4_to_number). The number must be + an unsigned integer between 0 and 4294967295 (both inclusive). +- returns **stringAddress** (string): the string representation of the IPv4 + address. If the input `numberAddress` is not a valid representation of an + IPv4 address, the function returns `null` and produces a warning. + +**Examples** + +```aql +--- +name: aqlIPv4FromNumber_1 +description: '' +--- +RETURN IPV4_FROM_NUMBER(0) +``` + +```aql +--- +name: aqlIPv4FromNumber_2 +description: '' +--- +RETURN IPV4_FROM_NUMBER(134744072) +``` + +```aql +--- +name: aqlIPv4FromNumber_3 +description: '' +--- +RETURN IPV4_FROM_NUMBER(2130706433) +``` + +```aql +--- +name: aqlIPv4FromNumber_4 +description: '' +--- +RETURN IPV4_FROM_NUMBER(3232235521) +``` + +```aql +--- +name: aqlIPv4FromNumber_5 +description: '' +--- +RETURN IPV4_FROM_NUMBER(-23) // invalid, produces a warning +``` + +## IPV4_TO_NUMBER() + +`IPV4_TO_NUMBER(stringAddress) → numericAddress` + +Converts an IPv4 address string into its numeric representation. + +- **stringAddress** (string): a string representing an IPv4 address +- returns **numericAddress** (number): the numeric representation of the IPv4 + address, as an unsigned integer. If the input `stringAddress` is not a valid + representation of an IPv4 address, the function returns `null` and produces + a warning. + +**Examples** + +```aql +--- +name: aqlIPv4ToNumber_1 +description: '' +--- +RETURN IPV4_TO_NUMBER("0.0.0.0") +``` + +```aql +--- +name: aqlIPv4ToNumber_2 +description: '' +--- +RETURN IPV4_TO_NUMBER("8.8.8.8") +``` + +```aql +--- +name: aqlIPv4ToNumber_3 +description: '' +--- +RETURN IPV4_TO_NUMBER("127.0.0.1") +``` + +```aql +--- +name: aqlIPv4ToNumber_4 +description: '' +--- +RETURN IPV4_TO_NUMBER("192.168.0.1") +``` + +```aql +--- +name: aqlIPv4ToNumber_5 +description: '' +--- +RETURN IPV4_TO_NUMBER("milk") // invalid, produces a warning +``` + +## IS_IPV4() + +`IS_IPV4(value) → bool` + +Check if an arbitrary string is suitable for interpretation as an IPv4 address. + +- **value** (string): an arbitrary string +- returns **bool** (bool): `true` if `value` is a string that can be interpreted + as an IPv4 address. To be considered valid, the string must contain of 4 octets + of decimal numbers with 1 to 3 digits length each, allowing the values 0 to 255. + The octets must be separated by periods and must not have padding zeroes. + +**Examples** + +```aql +--- +name: aqlIsIPv4_1 +description: '' +--- +RETURN IS_IPV4("127.0.0.1") +``` + +```aql +--- +name: aqlIsIPv4_2 +description: '' +--- +RETURN IS_IPV4("8.8.8.8") +``` + +```aql +--- +name: aqlIsIPv4_3 +description: '' +--- +RETURN IS_IPV4("008.008.008.008") +``` + +```aql +--- +name: aqlIsIPv4_4 +description: '' +--- +RETURN IS_IPV4("12345.2.3.4") +``` + +```aql +--- +name: aqlIsIPv4_5 +description: '' +--- +RETURN IS_IPV4("12.34") +``` + +```aql +--- +name: aqlIsIPv4_6 +description: '' +--- +RETURN IS_IPV4(8888) +``` + +## JSON_PARSE() + +`JSON_PARSE(text) → value` + +Return an AQL value described by the JSON-encoded input string. + +- **text** (string): the string to parse as JSON +- returns **value** (any): the value corresponding to the given JSON text. + For input values that are no valid JSON strings, the function will return `null`. + +**Examples** + +```aql +--- +name: aqlJsonParse_1 +description: '' +--- +RETURN JSON_PARSE("123") +``` + +```aql +--- +name: aqlJsonParse_2 +description: '' +--- +RETURN JSON_PARSE("[ true, false, null, -0.5 ]") +``` + +```aql +--- +name: aqlJsonParse_3 +description: '' +--- +RETURN JSON_PARSE('{"a": 1}') +``` + +```aql +--- +name: aqlJsonParse_4 +description: '' +--- +RETURN JSON_PARSE('"abc"') +``` + +```aql +--- +name: aqlJsonParse_5 +description: '' +--- +RETURN JSON_PARSE("abc") // invalid JSON +``` + +## JSON_STRINGIFY() + +`JSON_STRINGIFY(value) → text` + +Return a JSON string representation of the input value. + +- **value** (any): the value to convert to a JSON string +- returns **text** (string): the JSON string representing `value`. + For input values that cannot be converted to JSON, the function + will return `null`. + +**Examples** + +```aql +--- +name: aqlJsonStringify_1 +description: '' +--- +RETURN JSON_STRINGIFY(true) +``` + +```aql +--- +name: aqlJsonStringify_2 +description: '' +--- +RETURN JSON_STRINGIFY("abc") +``` + +```aql +--- +name: aqlJsonStringify_3 +description: '' +--- +RETURN JSON_STRINGIFY( [1, {'2': .5}] ) +``` + +## LEFT() + +`LEFT(value, n) → substring` + +Return the `n` leftmost characters of the string `value`. + +To return the rightmost characters, see [`RIGHT()`](#right).\ +To take a part from an arbitrary position off the string, +see [`SUBSTRING()`](#substring). + +- **value** (string): a string +- **n** (number): how many characters to return +- returns **substring** (string): at most `n` characters of `value`, + starting on the left-hand side of the string + +**Examples** + +```aql +--- +name: aqlLeft_1 +description: '' +--- +RETURN LEFT("foobar", 3) +``` + +```aql +--- +name: aqlLeft_2 +description: '' +--- +RETURN LEFT("foobar", 10) +``` + +## LENGTH() + +`LENGTH(str) → length` + +Determine the character length of a string. + +- **str** (string): a string. If a number is passed, it will be casted to string first. +- returns **length** (number): the character length of `str` (not byte length) + +`LENGTH()` can also determine the [number of elements](array.md#length) in an array, +the [number of attribute keys](document-object.md#length) of an object / document and +the [amount of documents](miscellaneous.md#length) in a collection. + +**Examples** + +```aql +--- +name: aqlLengthString_1 +description: '' +--- +RETURN LENGTH("foobar") +``` + +```aql +--- +name: aqlLengthString_2 +description: '' +--- +RETURN LENGTH("电脑坏了") +``` + +## LEVENSHTEIN_DISTANCE() + +`LEVENSHTEIN_DISTANCE(value1, value2) → distance` + +Calculate the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) +between two strings. + +- **value1** (string): a string +- **value2** (string): a string +- returns **distance** (number): calculated Damerau-Levenshtein distance + between the input strings `value1` and `value2` + +**Examples** + +```aql +--- +name: aqlLevenshteinDistance_1 +description: '' +--- +RETURN LEVENSHTEIN_DISTANCE("foobar", "bar") +``` + +```aql +--- +name: aqlLevenshteinDistance_2 +description: '' +--- +RETURN LEVENSHTEIN_DISTANCE(" ", "") +``` + +```aql +--- +name: aqlLevenshteinDistance_3 +description: '' +--- +RETURN LEVENSHTEIN_DISTANCE("The quick brown fox jumps over the lazy dog", "The quick black dog jumps over the brown fox") +``` + +```aql +--- +name: aqlLevenshteinDistance_4 +description: '' +--- +RETURN LEVENSHTEIN_DISTANCE("der mötör trötet", "der trötet") +``` + +## LIKE() + +`LIKE(text, search, caseInsensitive) → bool` + +Check whether the pattern `search` is contained in the string `text`, +using wildcard matching. + +- `_`: A single arbitrary character +- `%`: Zero, one or many arbitrary characters +- `\\_`: A literal underscore +- `\\%`: A literal percent sign + +{{< info >}} +Literal backlashes require different amounts of escaping depending on the +context: +- `\` in bind variables (_Table_ view mode) in the web interface (automatically + escaped to `\\` unless the value is wrapped in double quotes and already + escaped properly) +- `\\` in bind variables (_JSON_ view mode) and queries in the web interface +- `\\` in bind variables in arangosh +- `\\\\` in queries in arangosh +- Double the amount compared to arangosh in shells that use backslashes for +escaping (`\\\\` in bind variables and `\\\\\\\\` in queries) +{{< /info >}} + +The `LIKE()` function cannot be accelerated by any sort of index. However, +the [ArangoSearch `LIKE()` function](arangosearch.md#like) that +is used in the context of a `SEARCH` operation is backed by View indexes. + +- **text** (string): the string to search in +- **search** (string): a search pattern that can contain the wildcard characters + `%` (meaning any sequence of characters, including none) and `_` (any single + character). Literal `%` and `_` must be escaped with backslashes. + *search* cannot be a variable or a document attribute. The actual value must + be present at query parse time already. +- **caseInsensitive** (bool, *optional*): if set to `true`, the matching will be + case-insensitive. The default is `false`. +- returns **bool** (bool): `true` if the pattern is contained in `text`, + and `false` otherwise + +**Examples** + +```aql +--- +name: aqlLikeString_1 +description: '' +--- +RETURN [ + LIKE("cart", "ca_t"), + LIKE("carrot", "ca_t"), + LIKE("carrot", "ca%t") +] +``` + +```aql +--- +name: aqlLikeString_2 +description: '' +--- +RETURN [ + LIKE("foo bar baz", "bar"), + LIKE("foo bar baz", "%bar%"), + LIKE("bar", "%bar%") +] +``` + +```aql +--- +name: aqlLikeString_3 +description: '' +--- +RETURN [ + LIKE("FoO bAr BaZ", "fOo%bAz"), + LIKE("FoO bAr BaZ", "fOo%bAz", true) +] +``` + +## LOWER() + +`LOWER(value) → lowerCaseString` + +Convert upper-case letters in `value` to their lower-case counterparts. +All other characters are returned unchanged. + +- **value** (string): a string +- returns **lowerCaseString** (string): `value` with upper-case characters converted + to lower-case characters + +**Examples** + +```aql +--- +name: aqlLower +description: '' +--- +RETURN LOWER("AVOcado") +``` + +## LTRIM() + +`LTRIM(value, chars) → strippedString` + +Return the string `value` with whitespace stripped from the start only. + +To strip from the end only, see [`RTRIM()`](#rtrim).\ +To strip both sides, see [`TRIM()`](#trim). + +- **value** (string): a string +- **chars** (string, *optional*): override the characters that should + be removed from the string. It defaults to `\r\n \t` (i.e. `0x0d`, `0x0a`, + `0x20` and `0x09`). +- returns **strippedString** (string): `value` without `chars` at the + left-hand side + +```aql +--- +name: aqlLtrim_1 +description: '' +--- +RETURN LTRIM("foo bar") +``` + +```aql +--- +name: aqlLtrim_2 +description: '' +--- +RETURN LTRIM(" foo bar ") +``` + +```aql +--- +name: aqlLtrim_3 +description: '' +--- +RETURN LTRIM("--==[foo-bar]==--", "-=[]") +``` + +## MD5() + +`MD5(text) → hash` + +Calculate the MD5 checksum for `text` and return it in a hexadecimal +string representation. + +- **text** (string): a string +- returns **hash** (string): MD5 checksum as hex string + +**Examples** + +```aql +--- +name: aqlMd5 +description: '' +--- +RETURN MD5("foobar") +``` + +## NGRAM_POSITIONAL_SIMILARITY() + +`NGRAM_POSITIONAL_SIMILARITY(input, target, ngramSize) → similarity` + +Calculates the [_n_-gram similarity](https://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf) +between `input` and `target` using _n_-grams with minimum and maximum length of +`ngramSize`. + +The similarity is calculated by counting how long the longest sequence of +matching _n_-grams is, divided by the **longer argument's** total _n_-gram count. +Partially matching _n_-grams are counted, whereas +[`NGRAM_SIMILARITY()`](#ngram_similarity) counts only fully matching _n_-grams. + +The _n_-grams for both input and target are calculated on the fly, +not involving Analyzers. + +- **input** (string): source text to be tokenized into _n_-grams +- **target** (string): target text to be tokenized into _n_-grams +- **ngramSize** (number): minimum as well as maximum _n_-gram length +- returns **similarity** (number): value between `0.0` and `1.0` + +**Examples** + +```aql +--- +name: aqlNgramPositionalSimilarity +description: '' +--- +RETURN [ + NGRAM_POSITIONAL_SIMILARITY("quick fox", "quick foxx", 2), + NGRAM_POSITIONAL_SIMILARITY("quick fox", "quick foxx", 3), + NGRAM_POSITIONAL_SIMILARITY("quick fox", "quirky fox", 2), + NGRAM_POSITIONAL_SIMILARITY("quick fox", "quirky fox", 3) +] +``` + +## NGRAM_SIMILARITY() + +`NGRAM_SIMILARITY(input, target, ngramSize) → similarity` + +Calculates [_n_-gram similarity](https://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf) +between `input` and `target` using _n_-grams with minimum and maximum length of +`ngramSize`. + +The similarity is calculated by counting how long the longest sequence of +matching _n_-grams is, divided by **target's** total _n_-gram count. +Only fully matching _n_-grams are counted, whereas +[`NGRAM_POSITIONAL_SIMILARITY()`](#ngram_positional_similarity) counts partially +matching _n_-grams too. This behavior matches the similarity measure used in +[`NGRAM_MATCH()`](arangosearch.md#ngram_match). + +The _n_-grams for both input and target are calculated on the fly, not involving +Analyzers. + +- **input** (string): source text to be tokenized into _n_-grams +- **target** (string): target text to be tokenized into _n_-grams +- **ngramSize** (number): minimum as well as maximum _n_-gram length +- returns **similarity** (number): value between `0.0` and `1.0` + +**Examples** + +```aql +--- +name: aqlNgramSimilarity +description: '' +--- +RETURN [ + NGRAM_SIMILARITY("quick fox", "quick foxx", 2), + NGRAM_SIMILARITY("quick fox", "quick foxx", 3), + NGRAM_SIMILARITY("quick fox", "quirky fox", 2), + NGRAM_SIMILARITY("quick fox", "quirky fox", 3) +] +``` + +## RANDOM_TOKEN() + +`RANDOM_TOKEN(length) → randomString` + +Generate a pseudo-random token string with the specified length. +The algorithm for token generation should be treated as opaque. + +- **length** (number): desired string length for the token. It must be greater + or equal to 0 and at most 65536. A `length` of 0 returns an empty string. +- returns **randomString** (string): a generated token consisting of lowercase + letters, uppercase letters and numbers + +**Examples** + +```aql +--- +name: aqlRandomToken +description: '' +--- +RETURN [ + RANDOM_TOKEN(8), + RANDOM_TOKEN(8) +] +``` + +## REGEX_MATCHES() + +`REGEX_MATCHES(text, regex, caseInsensitive) → stringArray` + +Return the matches in the given string `text`, using the `regex`. + +- **text** (string): the string to search in +- **regex** (string): a [regular expression](#regular-expression-syntax) + to use for matching the `text` +- **caseInsensitive** (bool, *optional*): if set to `true`, the matching will be + case-insensitive. The default is `false`. +- returns **stringArray** (array): an array of strings containing the matches, + or `null` and a warning if the expression is invalid + +**Examples** + +```aql +--- +name: aqlRegexMatches_1 +description: '' +--- +RETURN REGEX_MATCHES("My-us3r_n4m3", "^[a-z0-9_-]{3,16}$", true) +``` + +```aql +--- +name: aqlRegexMatches_2 +description: '' +--- +RETURN REGEX_MATCHES("#4d82h4", "^#?([a-f0-9]{6}|[a-f0-9]{3})$", true) +``` + +```aql +--- +name: aqlRegexMatches_3 +description: '' +--- +RETURN REGEX_MATCHES("john@doe.com", "^([a-z0-9_\\\\.-]+)@([\\\\da-z-]+)\\\\.([a-z\\\\.]{2,6})$", false) +``` + +## REGEX_SPLIT() + +`REGEX_SPLIT(text, splitExpression, caseInsensitive, limit) → stringArray` + +Split the given string `text` into a list of strings at positions where +`splitExpression` matches. + +- **text** (string): the string to split +- **splitExpression** (string): a [regular expression](#regular-expression-syntax) + to use for splitting the `text`. You can define a capturing group to keep matches +- **caseInsensitive** (bool, *optional*): if set to `true`, the matching will be + case-insensitive. The default is `false`. +- **limit** (number, *optional*): limit the number of split values in the result. + If no `limit` is given, the number of splits returned is not bounded. +- returns **stringArray** (array): an array of strings, or `null` and a warning + if the expression is invalid + +**Examples** + +```aql +--- +name: aqlRegexSplit_1 +description: '' +--- +RETURN REGEX_SPLIT("This is a line.\\n This is yet another line\\r\\n This again is a line.\\r Mac line ", "\\\\.?\\r\\n|\\r|\\n") +``` + +```aql +--- +name: aqlRegexSplit_2 +description: '' +--- +RETURN REGEX_SPLIT("hypertext language, programming", "[\\\\s, ]+") +``` + +```aql +--- +name: aqlRegexSplit_3 +description: '' +--- +RETURN [ + REGEX_SPLIT("Capture the article", "(the)"), + REGEX_SPLIT("Don't capture the article", "the") +] +``` + +```aql +--- +name: aqlRegexSplit_4 +description: '' +--- +RETURN REGEX_SPLIT("cA,Bc,A,BcA,BcA,Bc", "a,b", true, 3) +``` + +## REGEX_TEST() + +`REGEX_TEST(text, search, caseInsensitive) → bool` + +Check whether the pattern `search` is contained in the string `text`, +using regular expression matching. + +- **text** (string): the string to search in +- **search** (string): a [regular expression](#regular-expression-syntax) + search pattern +- **caseInsensitive** (bool, *optional*): if set to `true`, the matching will be + case-insensitive. The default is `false`. +- returns **bool** (bool): `true` if the pattern is contained in `text`, + and `false` otherwise, or `null` and a warning if the expression is invalid + +**Examples** + +```aql +--- +name: aqlRegexTest_1 +description: '' +--- +RETURN REGEX_TEST("the quick brown fox", "the.*fox") +``` + +```aql +--- +name: aqlRegexTest_2 +description: '' +--- +RETURN REGEX_TEST("the quick brown fox", "^(a|the)\\\\s+(quick|slow).*f.x$") +``` + +```aql +--- +name: aqlRegexTest_3 +description: '' +--- +RETURN REGEX_TEST("the\\nquick\\nbrown\\nfox", "^the(\\n[a-w]+)+\\nfox$") +``` + +## REGEX_REPLACE() + +`REGEX_REPLACE(text, search, replacement, caseInsensitive) → string` + +Replace the pattern `search` with the string `replacement` in the string +`text`, using regular expression matching. + +- **text** (string): the string to search in +- **search** (string): a [regular expression](#regular-expression-syntax) + search pattern +- **replacement** (string): the string to replace the `search` pattern with +- **caseInsensitive** (bool, *optional*): if set to `true`, the matching will be + case-insensitive. The default is `false`. +- returns **string** (string): the string `text` with the `search` regex + pattern replaced with the `replacement` string wherever the pattern exists + in `text`, or `null` and a warning if the expression is invalid + +**Examples** + +```aql +--- +name: aqlRegexReplace_1 +description: '' +--- +RETURN REGEX_REPLACE("the quick brown fox", "the.*fox", "jumped over") +``` + +```aql +--- +name: aqlRegexReplace_2 +description: '' +--- +RETURN REGEX_REPLACE("An Avocado", "a", "_") +``` + +```aql +--- +name: aqlRegexReplace_3 +description: '' +--- +RETURN REGEX_REPLACE("An Avocado", "a", "_", true) +``` + +## REVERSE() + +`REVERSE(value) → reversedString` + +Return the reverse of the string `value`. + +- **value** (string): a string +- returns **reversedString** (string): a new string with the characters in + reverse order + +**Examples** + +```aql +--- +name: aqlReverse_1 +description: '' +--- +RETURN REVERSE("foobar") +``` + +```aql +--- +name: aqlReverse_2 +description: '' +--- +RETURN REVERSE("电脑坏了") +``` + +## RIGHT() + +`RIGHT(value, length) → substring` + +Return the `length` rightmost characters of the string `value`. + +To return the leftmost characters, see [`LEFT()`](#left).\ +To take a part from an arbitrary position off the string, +see [`SUBSTRING()`](#substring). + +- **value** (string): a string +- **length** (number): how many characters to return +- returns **substring** (string): at most `length` characters of `value`, + starting on the right-hand side of the string + +**Examples** + +```aql +--- +name: aqlRight_1 +description: '' +--- +RETURN RIGHT("foobar", 3) +``` + +```aql +--- +name: aqlRight_2 +description: '' +--- +RETURN RIGHT("foobar", 10) +``` + +## RTRIM() + +`RTRIM(value, chars) → strippedString` + +Return the string `value` with whitespace stripped from the end only. + +To strip from the start only, see [`LTRIM()`](#ltrim).\ +To strip both sides, see [`TRIM()`](#trim). + +- **value** (string): a string +- **chars** (string, *optional*): override the characters that should + be removed from the string. It defaults to `\r\n \t` (i.e. `0x0d`, `0x0a`, + `0x20` and `0x09`). +- returns **strippedString** (string): `value` without `chars` at the + right-hand side + +**Examples** + +```aql +--- +name: aqlRtrim_1 +description: '' +--- +RETURN RTRIM("foo bar") +``` + +```aql +--- +name: aqlRtrim_2 +description: '' +--- +RETURN RTRIM(" foo bar ") +``` + +```aql +--- +name: aqlRtrim_3 +description: '' +--- +RETURN RTRIM("--==[foo-bar]==--", "-=[]") +``` + +## SHA1() + +`SHA1(text) → hash` + +Calculate the SHA1 checksum for `text` and returns it in a hexadecimal +string representation. + +- **text** (string): a string +- returns **hash** (string): SHA1 checksum as hex string + +**Examples** + +```aql +--- +name: aqlSha1 +description: '' +--- +RETURN SHA1("foobar") +``` + +## SHA256() + +`SHA256(text) → hash` + +Calculate the SHA256 checksum for `text` and return it in a hexadecimal +string representation. + +- **text** (string): a string +- returns **hash** (string): SHA256 checksum as hex string + +**Examples** + +```aql +--- +name: aqlSha256 +description: '' +--- +RETURN SHA256("foobar") +``` + +## SHA512() + +`SHA512(text) → hash` + +Calculate the SHA512 checksum for `text` and return it in a hexadecimal +string representation. + +- **text** (string): a string +- returns **hash** (string): SHA512 checksum as hex string + +**Examples** + +```aql +--- +name: aqlSha512 +description: '' +--- +RETURN SHA512("foobar") +``` + +## SOUNDEX() + +`SOUNDEX(value) → soundexString` + +Return the [Soundex](https://en.wikipedia.org/wiki/Soundex) +fingerprint of `value`. + +- **value** (string): a string +- returns **soundexString** (string): a Soundex fingerprint of `value` + +**Examples** + +```aql +--- +name: aqlSoundex +description: '' +--- +RETURN [ + SOUNDEX("example"), + SOUNDEX("ekzampul"), + SOUNDEX("soundex"), + SOUNDEX("sounteks") +] +``` + +## SPLIT() + +`SPLIT(value, separator, limit) → strArray` + +Split the given string `value` into a list of strings, using the `separator`. + +To split a document identifier (`_id`) into the collection name and document key +(`_key`), you should use the more optimized +[`PARSE_IDENTIFIER()` function](document-object.md#parse_identifier). + +- **value** (string): a string +- **separator** (string): either a string or a list of strings. If `separator` is + an empty string, `value` will be split into a list of characters. If no `separator` + is specified, `value` will be returned as array. +- **limit** (number, *optional*): limit the number of split values in the result. + If no `limit` is given, the number of splits returned is not bounded. +- returns **strArray** (array): an array of strings + +**Examples** + +```aql +--- +name: aqlSplit_1 +description: '' +--- +RETURN SPLIT( "foo-bar-baz", "-" ) +``` + +```aql +--- +name: aqlSplit_2 +description: '' +--- +RETURN SPLIT( "foo-bar-baz", "-", 1 ) +``` + +```aql +--- +name: aqlSplit_3 +description: '' +--- +RETURN SPLIT( "foo, bar & baz", [ ", ", " & " ] ) +``` + +## STARTS_WITH() + +`STARTS_WITH(text, prefix) → startsWith` + +Check whether the given string starts with `prefix`. + +There is a corresponding [`STARTS_WITH()` ArangoSearch function](arangosearch.md#starts_with) +that can utilize View indexes. + +- **text** (string): a string to compare against +- **prefix** (string): a string to test for at the start of the text +- returns **startsWith** (bool): whether the text starts with the given prefix + +**Examples** + +```aql +--- +name: aqlStartsWith_1 +description: '' +--- +RETURN STARTS_WITH("foobar", "foo") +``` + +```aql +--- +name: aqlStartsWith_2 +description: '' +--- +RETURN STARTS_WITH("foobar", "baz") +``` + +--- + +`STARTS_WITH(text, prefixes, minMatchCount) → startsWith` + +Check if the given string starts with one of the `prefixes`. + +- **text** (string): a string to compare against +- **prefixes** (array): an array of strings to test for at the start of the text +- **minMatchCount** (number, _optional_): minimum number of prefixes that + should be satisfied. The default is `1` and it is the only meaningful value + unless `STARTS_WITH()` is used in the context of a `SEARCH` expression where + an attribute can have multiple values at the same time +- returns **startsWith** (bool): whether the text starts with at least + *minMatchCount* of the given prefixes + +**Examples** + +```aql +--- +name: aqlStartsWith_3 +description: '' +--- +RETURN STARTS_WITH("foobar", ["bar", "foo"]) +``` + +```aql +--- +name: aqlStartsWith_4 +description: '' +--- +RETURN STARTS_WITH("foobar", ["bar", "baz"]) +``` + +## SUBSTITUTE() + +`SUBSTITUTE(value, search, replace, limit) → substitutedString` + +Replace search values in the string `value`. + +- **value** (string): a string +- **search** (string\|array): if `search` is a string, all occurrences of + `search` will be replaced in `value`. If `search` is an array of strings, + each occurrence of a value contained in `search` will be replaced by the + corresponding array element in `replace`. If `replace` has less list items + than `search`, occurrences of unmapped `search` items will be replaced by an + empty string. +- **replace** (string\|array, *optional*): a replacement string, or an array of + strings to replace the corresponding elements of `search` with. Can have less + elements than `search` or be left out to remove matches. If `search` is an array + but `replace` is a string, then all matches will be replaced with `replace`. +- **limit** (number, *optional*): cap the number of replacements to this value +- returns **substitutedString** (string): a new string with matches replaced + (or removed) + +**Examples** + +```aql +--- +name: aqlSubstitute_1 +description: '' +--- +RETURN SUBSTITUTE( "the quick brown foxx", "quick", "lazy" ) +``` + +```aql +--- +name: aqlSubstitute_2 +description: '' +--- +RETURN SUBSTITUTE( "the quick brown foxx", [ "quick", "foxx" ], [ "slow", "dog" ] ) +``` + +```aql +--- +name: aqlSubstitute_3 +description: '' +--- +RETURN SUBSTITUTE( "the quick brown foxx", [ "the", "foxx" ], [ "that", "dog" ], 1 ) +``` + +```aql +--- +name: aqlSubstitute_4 +description: '' +--- +RETURN SUBSTITUTE( "the quick brown foxx", [ "the", "quick", "foxx" ], [ "A", "VOID!" ] ) +``` + +```aql +--- +name: aqlSubstitute_5 +description: '' +--- +RETURN SUBSTITUTE( "the quick brown foxx", [ "quick", "foxx" ], "xx" ) +``` + +--- + +`SUBSTITUTE(value, mapping, limit) → substitutedString` + +Alternatively, `search` and `replace` can be specified in a combined value. + +- **value** (string): a string +- **mapping** (object): a lookup map with search strings as keys and replacement + strings as values. Empty strings and `null` as values remove matches. + Note that there is no defined order in which the mapping is processed. In case + of overlapping searches and substitutions, one time the first entry may win, + another time the second. If you need to ensure a specific order then choose + the array-based variant of this function +- **limit** (number, *optional*): cap the number of replacements to this value +- returns **substitutedString** (string): a new string with matches replaced + (or removed) + +**Examples** + +```aql +--- +name: aqlSubstitute_6 +description: '' +--- +RETURN SUBSTITUTE("the quick brown foxx", { + "quick": "small", + "brown": "slow", + "foxx": "ant" +}) +``` + +```aql +--- +name: aqlSubstitute_7 +description: '' +--- +RETURN SUBSTITUTE("the quick brown foxx", { + "quick": "", + "brown": null, + "foxx": "ant" +}) +``` + +```aql +--- +name: aqlSubstitute_8 +description: '' +--- +RETURN SUBSTITUTE("the quick brown foxx", { + "quick": "small", + "brown": "slow", + "foxx": "ant" +}, 2) +``` + +## SUBSTRING() + +`SUBSTRING(value, offset, length) → substring` + +Return a substring of `value`. + +To return the rightmost characters, see [`RIGHT()`](#right).\ +To return the leftmost characters, see [`LEFT()`](#left). + +- **value** (string): a string +- **offset** (number): start at this character of the string. Offsets start at 0. + Negative offsets start from the end of the string. The last character has an + index of -1 +- **length** (number, *optional*): take this many characters. Omit the parameter + to get the substring from `offset` to the end of the string +- returns **substring** (string): a substring of `value` + +**Examples** + +Get a substring starting at the 6th character and until the end of the string: + +```aql +--- +name: aqlSubstring_1 +description: '' +--- +RETURN SUBSTRING("Holy Guacamole!", 5) +``` + +Get a 4 characters long substring, starting at the 11th character: + +```aql +--- +name: aqlSubstring_2 +description: '' +--- +RETURN SUBSTRING("Holy Guacamole!", 10, 4) +``` + +Get a 4 characters long substring, starting at the 5th from last character: + +```aql +--- +name: aqlSubstring_3 +description: '' +--- +RETURN SUBSTRING("Holy Guacamole!", -5, 4) +``` + +## SUBSTRING_BYTES() + +`SUBSTRING_BYTES(value, offset, length) → substring` + +Return a substring of `value`, using an `offset` and `length` in bytes instead +of in number of characters. + +This function is intended to be used together with the +[`OFFSET_INFO()` function](arangosearch.md#offset_info) for +[search highlighting](../../indexes-and-search/arangosearch/search-highlighting.md). + +- **value** (string): a string +- **offset** (number): start at this byte of the UTF-8 encoded string. + Offsets start at 0. Negative offsets start from the end of the string. + The last byte has an index of -1. The offset needs to coincide with the + beginning of a character's byte sequence +- **length** (number, *optional*): take this many bytes. Omit the parameter to + get the substring from `offset` to the end of the string. The end byte + (`offset` + `length`) needs to coincide with the end of a character's + byte sequence +- returns **substring** (string\|null): a substring of `value`, or `null` and + produces a warning if the start or end byte is in the middle of a character's + byte sequence + +**Examples** + +Get a substring starting at the 11th byte and until the end of the string. +Note that the heart emoji is comprised of two characters, the Black Heart Symbol +and the Variation Selector-16, each encoded using 3 bytes in UTF-8: + +```aql +--- +name: aqlSubstringBytes_1 +description: '' +--- +RETURN SUBSTRING_BYTES("We ❤️ avocado!", 10) +``` + +Get a 3 bytes long substring starting at the 3rd byte, extracting the +Black Heart Symbol: + +```aql +--- +name: aqlSubstringBytes_2 +description: '' +--- +RETURN SUBSTRING_BYTES("We ❤️ avocado!", 3, 3) +``` + +Get a 6 bytes long substring starting at the 15th byte from last, extracting the +heart emoji: + +```aql +--- +name: aqlSubstringBytes_3 +description: '' +--- +RETURN SUBSTRING_BYTES("We ❤️ avocado!", -15, 6) +``` + +Try to get a 4 bytes long substring starting at the 15th byte from last, +resulting in a `null` value and a warning because the substring contains an +incomplete UTF-8 byte sequence: + +```aql +--- +name: aqlSubstringBytes_4 +description: '' +--- +RETURN SUBSTRING_BYTES("We ❤️ avocado!", -15, 4) +``` + +## TOKENS() + +`TOKENS(input, analyzer) → tokenArray` + +Split the `input` string(s) with the help of the specified `analyzer` into an +array. The resulting array can be used in `FILTER` or `SEARCH` statements with +the `IN` operator, but also be assigned to variables and returned. This can be +used to better understand how a specific Analyzer processes an input value. + +It has a regular return value unlike all other ArangoSearch AQL functions and +is thus not limited to `SEARCH` operations. It is independent of Views. +A wrapping `ANALYZER()` call in a search expression does not affect the +`analyzer` argument nor allow you to omit it. + +- **input** (string\|array): text to tokenize. Accepts recursive arrays of + strings. +- **analyzer** (string): name of an [Analyzer](../../indexes-and-search/analyzers.md). +- returns **tokenArray** (array): array of strings with zero or more elements, + each element being a token. + +**Examples** + +Example query showcasing the `"text_de"` Analyzer (tokenization with stemming, +case conversion and accent removal for German text): + +```aql +--- +name: aqlTokens_1 +description: '' +--- +RETURN TOKENS("Lörem ipsüm, DOLOR SIT Ämet.", "text_de") +``` + +To search a View for documents where the `text` attribute contains certain +words/tokens in any order, you can use the function like this: + +```aql +FOR doc IN viewName + SEARCH ANALYZER(doc.text IN TOKENS("dolor amet lorem", "text_en"), "text_en") + RETURN doc +``` + +It will match `{ "text": "Lorem ipsum, dolor sit amet." }` for instance. If you +want to search for tokens in a particular order, use +[`PHRASE()`](arangosearch.md#phrase) instead. + +If an array of strings is passed as first argument, then each string is +tokenized individually and an array with the same nesting as the input array +is returned: + +```aql +--- +name: aqlTokens_2 +description: '' +--- +RETURN TOKENS("quick brown fox", "text_en") +``` + +```aql +--- +name: aqlTokens_3 +description: '' +--- +RETURN TOKENS(["quick brown", "fox"], "text_en") +``` + +```aql +--- +name: aqlTokens_4 +description: '' +--- +RETURN TOKENS(["quick brown", ["fox"]], "text_en") +``` + +In most cases you will want to flatten the resulting array for further usage, +because nested arrays are not accepted in `SEARCH` statements such as +` ALL IN doc.`: + +```aql +LET tokens = TOKENS(["quick brown", ["fox"]], "text_en") // [ ["quick", "brown"], [["fox"]] ] +LET tokens_flat = FLATTEN(tokens, 2) // [ "quick", "brown", "fox" ] +FOR doc IN myView SEARCH ANALYZER(tokens_flat ALL IN doc.title, "text_en") RETURN doc +``` + +## TO_BASE64() + +`TO_BASE64(value) → encodedString` + +Return the Base64 representation of `value`. + +- **value** (string): a string +- returns **encodedString** (string): a Base64 representation of `value` + +**Examples** + +```aql +--- +name: aqlToBase64 +description: '' +--- +RETURN [ + TO_BASE64("ABC."), + TO_BASE64("123456") +] +``` + +## TO_HEX() + +`TO_HEX(value) → hexString` + +Return the hexadecimal representation of `value`. + +- **value** (string): a string +- returns **hexString** (string): a hexadecimal representation of `value` + +**Examples** + +```aql +--- +name: aqlToHex +description: '' +--- +RETURN [ + TO_HEX("ABC."), + TO_HEX("ü") +] +``` + +## TRIM() + +`TRIM(value, type) → strippedString` + +Return the string `value` with whitespace stripped from the start and/or end. + +The optional `type` parameter specifies from which parts of the string the +whitespace is stripped. [`LTRIM()`](#ltrim) and [`RTRIM()`](#rtrim) are preferred +however. + +- **value** (string): a string +- **type** (number, *optional*): strip whitespace from the + - `0` – start and end of the string (default) + - `1` – start of the string only + - `2` – end of the string only + +--- + +`TRIM(value, chars) → strippedString` + +Return the string `value` with whitespace stripped from the start and end. + +- **value** (string): a string +- **chars** (string, *optional*): override the characters that should + be removed from the string. It defaults to `\r\n \t` (i.e. `0x0d`, `0x0a`, + `0x20` and `0x09`). +- returns **strippedString** (string): `value` without `chars` on both sides + +**Examples** + +```aql +--- +name: aqlTrim_1 +description: '' +--- +RETURN TRIM("foo bar") +``` + +```aql +--- +name: aqlTrim_2 +description: '' +--- +RETURN TRIM(" foo bar ") +``` + +```aql +--- +name: aqlTrim_3 +description: '' +--- +RETURN TRIM("--==[foo-bar]==--", "-=[]") +``` + +```aql +--- +name: aqlTrim_4 +description: '' +--- +RETURN TRIM(" foobar\\t \\r\\n ") +``` + +```aql +--- +name: aqlTrim_5 +description: '' +--- +RETURN TRIM(";foo;bar;baz, ", ",; ") +``` + +## UPPER() + +`UPPER(value) → upperCaseString` + +Convert lower-case letters in `value` to their upper-case counterparts. +All other characters are returned unchanged. + +- **value** (string): a string +- returns **upperCaseString** (string): `value` with lower-case characters converted + to upper-case characters + +**Examples** + +```aql +--- +name: aqlUpper +description: '' +--- +RETURN UPPER("AVOcado") +``` + +## UUID() + +`UUID() → UUIDString` + +Return a universally unique identifier value. + +- returns **UUIDString** (string): a universally unique identifier + +**Examples** + +```aql +--- +name: aqlUuid +description: '' +--- +FOR i IN 1..3 + RETURN UUID() +``` + +## Regular Expression Syntax + +A regular expression may consist of literal characters and the following +characters and sequences: + +- `.` – the dot matches any single character except line terminators. + To include line terminators, use `[\s\S]` instead to simulate `.` with *DOTALL* flag. +- `\d` – matches a single digit, equivalent to `[0-9]` +- `\s` – matches a single whitespace character +- `\S` – matches a single non-whitespace character +- `\b` – matches a word boundary. This match is zero-length +- `\B` – Negation of `\b`. The match is zero-length +- `[xyz]` – set of characters. Matches any of the enclosed characters + (here: *x*, *y*, or *z*) +- `[^xyz]` – negated set of characters. Matches any other character than the + enclosed ones (i.e. anything but *x*, *y*, or *z* in this case) +- `[x-z]` – range of characters. Matches any of the characters in the + specified range, e.g. `[0-9A-F]` to match any character in + *0123456789ABCDEF* +- `[^x-z]` – negated range of characters. Matches any other character than the + ones specified in the range +- `(xyz)` – defines and matches a pattern group. Also defines a capturing group. +- `(?:xyz)` – defines and matches a pattern group without capturing the match +- `(xy|z)` – matches either *xy* or *z* +- `^` – matches the beginning of the string (e.g. `^xyz`) +- `$` – matches the end of the string (e.g. `xyz$`) + +To literally match one of the characters that have a special meaning in regular +expressions (`.`, `*`, `?`, `[`, `]`, `(`, `)`, `{`, `}`, `^`, `$`, and `\`) +you may need to escape the character with a backslash, which typically requires +escaping itself. The backslash of shorthand character classes like `\d`, `\s`, +and `\b` counts as literal backslash. The backslash of JSON escape sequences +like `\t` (tabulation), `\r` (carriage return), and `\n` (line feed) does not, +however. + +{{< info >}} +Literal backlashes require different amounts of escaping depending on the +context: +- `\` in bind variables (_Table_ view mode) in the web interface (automatically + escaped to `\\` unless the value is wrapped in double quotes and already + escaped properly) +- `\\` in bind variables (_JSON_ view mode) and queries in the web interface +- `\\` in bind variables in arangosh +- `\\\\` in queries in arangosh +- Double the amount compared to arangosh in shells that use backslashes for +escaping (`\\\\` in bind variables and `\\\\\\\\` in queries) +{{< /info >}} + +Characters and sequences may optionally be repeated using the following +quantifiers: + +- `x?` – matches one or zero occurrences of *x* +- `x*` – matches zero or more occurrences of *x* (greedy) +- `x+` – matches one or more occurrences of *x* (greedy) +- `x*?` – matches zero or more occurrences of *x* (non-greedy) +- `x+?` – matches one or more occurrences of *x* (non-greedy) +- `x{y}` – matches exactly *y* occurrences of *x* +- `x{y,z}` – matches between *y* and *z* occurrences of *x* +- `x{y,}` – matches at least *y* occurrences of *x* + +Note that `xyz+` matches *xyzzz*, but if you want to match *xyzxyz* instead, +you need to define a pattern group by wrapping the sub-expression in parentheses +and place the quantifier right behind it, like `(xyz)+`. diff --git a/site/content/arangodb/oem/aql/functions/type-check-and-cast.md b/site/content/arangodb/oem/aql/functions/type-check-and-cast.md new file mode 100644 index 0000000000..81b3bb9870 --- /dev/null +++ b/site/content/arangodb/oem/aql/functions/type-check-and-cast.md @@ -0,0 +1,279 @@ +--- +title: Type check and cast functions in AQL +menuTitle: Type check & cast +weight: 55 +description: >- + AQL provides functions for checking data types and converting between + different types +--- +Some operators expect their operands to have a certain data type. For example, +logical operators expect their operands to be boolean values, and the arithmetic +operators expect their operands to be numeric values. If an operation is performed +with operands of other types, an automatic conversion to the expected types is +tried. This is called implicit type casting. It helps to avoid query +aborts. + +Type casts can also be performed upon request by invoking a type cast function. +This is called explicit type casting. AQL offers several functions for this. +Each of the these functions takes an operand of any data type and returns a result +value with the type corresponding to the function name. For example, `TO_NUMBER()` +returns a numeric value. + +## Type casting functions + +### TO_BOOL() + +`TO_BOOL(value) → bool` + +Take an input *value* of any type and convert it into the appropriate +boolean value. + +- **value** (any): input of arbitrary type +- returns **bool** (boolean): + - *null* is converted to *false* + - Numbers are converted to *true*, except for 0, which is converted to *false* + - Strings are converted to *true* if they are non-empty, and to *false* otherwise + - Arrays are always converted to *true* (even if empty) + - Objects / documents are always converted to *true* + +It's also possible to use double negation to cast to boolean: + +```aql +!!1 // true +!!0 // false +!!-0.0 // false +not not 1 // true +!!"non-empty string" // true +!!"" // false +``` + +`TO_BOOL()` is preferred however, because it states the intention clearer. + +### TO_NUMBER() + +`TO_NUMBER(value) → number` + +Take an input *value* of any type and convert it into a numeric value. + +- **value** (any): input of arbitrary type +- returns **number** (number): + - *null* and *false* are converted to the value *0* + - *true* is converted to *1* + - Numbers keep their original value + - Strings are converted to their numeric equivalent if the string contains a + valid representation of a number. Whitespace at the start and end of the string + is allowed. String values that do not contain any valid representation of a number + will be converted to the number *0*. + - An empty array is converted to *0*, an array with one member is converted into the + result of `TO_NUMBER()` for its sole member. An array with two or more members is + converted to the number *0*. + - An object / document is converted to the number *0*. + - A unary plus will also cast to a number, but `TO_NUMBER()` is the preferred way: + ```aql + +'5' // 5 + +[8] // 8 + +[8,9] // 0 + +{} // 0 + ``` + - A unary minus works likewise, except that a numeric value is also negated: + ```aql + -'5' // -5 + -[8] // -8 + -[8,9] // 0 + -{} // 0 + ``` + +### TO_STRING() + +`TO_STRING(value) → str` + +Take an input *value* of any type and convert it into a string value. + +- **value** (any): input of arbitrary type +- returns **str** (string): + - *null* is converted to an empty string `""` + - *false* is converted to the string *"false"*, *true* to the string *"true"* + - Numbers are converted to their string representations. This can also be a + scientific notation (e.g. "2e-7") + - Arrays and objects / documents are converted to string representations, + which means JSON-encoded strings with no additional whitespace + +```aql +TO_STRING(null) // "" +TO_STRING(true) // "true" +TO_STRING(false) // "false" +TO_STRING(123) // "123" +TO_STRING(+1.23) // "1.23" +TO_STRING(-1.23) // "-1.23" +TO_STRING(0.0000002) // "2e-7" +TO_STRING( [1, 2, 3] ) // "[1,2,3]" +TO_STRING( { foo: "bar", baz: null } ) // "{\"foo\":\"bar\",\"baz\":null}" +``` + +### TO_ARRAY() + +`TO_ARRAY(value) → array` + +Take an input *value* of any type and convert it into an array value. + +- **value** (any): input of arbitrary type +- returns **array** (array): + - *null* is converted to an empty array + - Boolean values, numbers and strings are converted to an array containing + the original value as its single element + - Arrays keep their original value + - Objects / documents are converted to an array containing their attribute + **values** as array elements, just like [`VALUES()`](document-object.md#values) + +```aql +TO_ARRAY(null) // [] +TO_ARRAY(false) // [false] +TO_ARRAY(true) // [true] +TO_ARRAY(5) // [5] +TO_ARRAY("foo") // ["foo"] +TO_ARRAY([1, 2, "foo"]) // [1, 2, "foo"] +TO_ARRAY({foo: 1, bar: 2, baz: [3, 4, 5]}) // [1, 2, [3, 4, 5]] +``` + +### TO_LIST() + +`TO_LIST(value) → array` + +This is an alias for [`TO_ARRAY()`](#to_array). + +## Type check functions + +AQL also offers functions to check the data type of a value at runtime. The +following type check functions are available. Each of these functions takes an +argument of any data type and returns true if the value has the type that is +checked for, and false otherwise. + +### IS_NULL() + +`IS_NULL(value) → bool` + +Check whether *value* is *null*. Identical to `value == null`. + +To test if an attribute exists, see [`HAS()`](document-object.md#has) instead. + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is `null`, + *false* otherwise + +### IS_BOOL() + +`IS_BOOL(value) → bool` + +Check whether *value* is a *boolean* value + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is `true` or `false`, + *false* otherwise + +### IS_NUMBER() + +`IS_NUMBER(value) → bool` + +Check whether *value* is a number + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is a number, + *false* otherwise + +### IS_STRING() + +`IS_STRING(value) → bool` + +Check whether *value* is a string + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is a string, + *false* otherwise + +### IS_ARRAY() + +`IS_ARRAY(value) → bool` + +Check whether *value* is an array / list + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is an array / list, + *false* otherwise + +### IS_LIST() + +`IS_LIST(value) → bool` + +This is an alias for [`IS_ARRAY()`](#is_array) + +### IS_OBJECT() + +`IS_OBJECT(value) → bool` + +Check whether *value* is an object / document + +- **value** (any): value to test +- returns **bool** (boolean): *true* if *value* is an object / document, + *false* otherwise + +### IS_DOCUMENT() + +`IS_DOCUMENT(value) → bool` + +This is an alias for [`IS_OBJECT()`](#is_object) + +### IS_DATESTRING() + +`IS_DATESTRING(str) → bool` + +Check whether *value* is a string that can be used in a date function. +This includes partial dates such as *"2015"* or *"2015-10"* and strings +containing properly formatted but invalid dates such as *"2015-02-31"*. + +- **str** (string): date string to test +- returns **bool** (boolean): *true* if *str* is a correctly formatted date string, + *false* otherwise including all non-string values, even if some of them may be usable + in date functions (numeric timestamps) + +### IS_IPV4() + +See [String Functions](string.md#is_ipv4). + +### IS_KEY() + +`IS_KEY(str) → bool` + +Check whether *value* is a string that can be used as a +document key, i.e. as the value of the *_key* attribute. +See [Document keys](../../concepts/data-structure/documents/_index.md#document-keys). + +- **str** (string): document key to test +- returns **bool** (boolean): whether *str* can be used as document key + +### TYPENAME() + +`TYPENAME(value) → typeName` + +Return the data type name of *value*. + +- **value** (any): input of arbitrary type +- returns **typeName** (string): data type name of *value* + (`"null"`, `"bool"`, `"number"`, `"string"`, `"array"` or `"object"`) + +Example Value | Data Type Name +---------------:|--------------- +`null` | `"null"` +`true` | `"bool"` +`false` | `"bool"` +`123` | `"number"` +`-4.56` | `"number"` +`0` | `"number"` +`"foobar"` | `"string"` +`"123"` | `"string"` +`""` | `"string"` +`[ 1, 2, 3 ]` | `"array"` +`["foo",true]` | `"array"` +`[ ]` | `"array"` +`{"foo":"bar"}` | `"object"` +`{"foo": null}` | `"object"` +`{ }` | `"object"` diff --git a/site/content/arangodb/oem/aql/fundamentals/_index.md b/site/content/arangodb/oem/aql/fundamentals/_index.md new file mode 100644 index 0000000000..1d1089e0a4 --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/_index.md @@ -0,0 +1,8 @@ +--- +title: AQL Fundamentals +menuTitle: Fundamentals +weight: 10 +description: >- + Learn about the core aspects of ArangoDB's query language, like the structure + of queries, the available data types, as well as result and error handling +--- diff --git a/site/content/arangodb/oem/aql/fundamentals/accessing-data-from-collections.md b/site/content/arangodb/oem/aql/fundamentals/accessing-data-from-collections.md new file mode 100644 index 0000000000..a757e89208 --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/accessing-data-from-collections.md @@ -0,0 +1,78 @@ +--- +title: Accessing data from collections with AQL +menuTitle: Accessing data from collections +weight: 25 +description: >- + You can access collection data by looping over a collection and reading + document attributes, with non-existing attributes returning a `null` value +--- +A collection can be thought of as an array of documents. To access the documents, +use a [`FOR` operation](../high-level-operations/for.md) to iterate over a +collection using its name, like `FOR doc IN collection ...`. + +Note that when iterating over a collection, the order of documents is undefined. +To establish an explicit and deterministic order for the documents, use a +[`SORT` operation](../high-level-operations/sort.md) in addition. + +Data in collections is stored in documents, which are JSON objects. Each document +potentially has different attributes than other documents. This is true even for +documents of the same collection. + +It is therefore quite normal to encounter documents that do not have some or all +of the attributes that are queried in an AQL query. In this case, the +non-existing attributes in the document are treated as if they would exist +with a value of `null`. This means that comparing a document attribute to +`null` returns `true` if the document has the particular attribute and the +attribute has a value of `null`, or that the document does not have the +particular attribute at all. + +For example, the following query returns all documents from the collection +`users` that have a value of `null` in the attribute `name`, plus all documents +from `users` that do not have the `name` attribute at all: + +```aql +FOR u IN users + FILTER u.name == null + RETURN u +``` + +Furthermore, `null` is less than any other value (excluding `null` itself). That +means documents with non-existing attributes may be included in the result +when comparing attribute values with the less than or less equal operators. + +For example, the following query returns all documents from the collection +`users` that have an attribute `age` with a value less than `39`, but also all +documents from the collection that do not have the attribute `age` at all. + +```aql +FOR u IN users + FILTER u.age < 39 + RETURN u +``` + +This behavior should always be taken into account when writing queries. + +To distinguish between an explicit `null` value and the implicit `null` value +you get if you access a non-existent attribute, you can use the +[`HAS()` function](../functions/document-object.md#has). The following query +only returns documents that have a `name` attribute with a `null` value: + +```aql +FOR u IN users + FILTER u.name == null AND HAS(u, "name") + RETURN u +``` + +To exclude implicit as well as explicit `null` values in a query that uses +`<` or `<=` comparison operators to limit the upper bound, you can add a check +for the lower bound: + +```aql +FOR u IN users + FILTER u.age > null AND u.age < 39 + // or potentially + //FILTER u.age >= 0 AND u.age < 39 + // which can be replaced with + //FILTER RANGE(u.age, 0, 39, true, false) + RETURN u +``` diff --git a/site/content/arangodb/oem/aql/fundamentals/bind-parameters.md b/site/content/arangodb/oem/aql/fundamentals/bind-parameters.md new file mode 100644 index 0000000000..4bb29ea3fb --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/bind-parameters.md @@ -0,0 +1,169 @@ +--- +title: Bind parameters in AQL +menuTitle: Bind Parameters +weight: 15 +description: >- + Bind parameters allow you to separate the query logic from literal values used + in the query and safely use user-provided input for these placeholders +--- +It is good practice to separate the query text from the literal values because +it prevents (malicious) injection of keywords and other collection names into an +existing query. This injection would be dangerous because it may change the +meaning of an existing query. + +Using bind parameters, the meaning of an existing query cannot be changed. +Bind parameters can be used everywhere in a query where literals can be used. +This lets you turn literals into a sort of variables to reuse the same query +with different parameterization. + +## Syntax + +The general syntax for bind parameters is `@name` where `@` signifies that this +is a value bind parameter and *name* is the actual parameter name. It can be +used to substitute values in a query. + +```aql +RETURN @value +``` + +For collections, there is a slightly different syntax `@@coll` where `@@` +signifies that it is a collection bind parameter and *coll* is the parameter +name. + +```aql +FOR doc IN @@coll + RETURN doc +``` + +Keywords and other language constructs cannot be replaced by bind values, such +as `FOR`, `FILTER`, `IN`, `INBOUND` or function calls. + +Bind parameter names must start with any of the letters *a* to *z* (upper or +lower case) or a digit (*0* to *9*), and can be followed by any letter, digit +or the underscore symbol. + +They must not be quoted in the query code: + +```aql +FILTER u.name == "@name" // wrong +FILTER u.name == @name // correct +``` + +```aql +FOR doc IN "@@collection" // wrong +FOR doc IN @@collection // correct +``` + +If you need to do string processing (concatenation, etc.) in the query, you +need to use [string functions](../functions/string.md) to do so: + +```aql +FOR u IN users + FILTER u.id == CONCAT('prefix', @id, 'suffix') && u.name == @name + RETURN u +``` + +## Usage + +### General + +The bind parameter values need to be passed along with the query when it is +executed, but not as part of the query text itself. In the web interface, +there is a pane next to the query editor where the bind parameters can be +entered. For below query, two input fields will show up to enter values for +the parameters `id` and `name`. + +```aql +FOR u IN users + FILTER u.id == @id && u.name == @name + RETURN u +``` + +When using `db._query()` (in arangosh for instance), then an +object of key-value pairs can be passed for the parameters. Such an object +can also be passed to the HTTP API endpoint `_api/cursor`, as attribute +value for the key `bindVars`: + +```json +{ + "query": "FOR u IN users FILTER u.id == @id && u.name == @name RETURN u", + "bindVars": { + "id": 123, + "name": "John Smith" + } +} +``` + +Bind parameters that are declared in the query must also be passed a parameter +value, or the query will fail. Specifying parameters that are not declared in +the query will result in an error too. + +Specific information about parameters binding can also be found in: + +- [AQL with Web Interface](../how-to-invoke-aql/with-the-web-interface.md) +- [AQL with _arangosh_](../how-to-invoke-aql/with-arangosh.md) +- [HTTP interface for AQL queries](../../develop/http-api/queries/aql-queries.md) + +### Nested attributes + +Bind parameters can be used for both, the dot notation as well as the square +bracket notation for sub-attribute access. They can also be chained: + +```aql +LET doc = { foo: { bar: "baz" } } + +RETURN doc.@attr.@subattr +// or +RETURN doc[@attr][@subattr] +``` + +```json +{ + "attr": "foo", + "subattr": "bar" +} +``` + +Both variants in above example return `[ "baz" ]` as query result. + +The whole attribute path, for highly nested data in particular, can also be +specified using the dot notation and a single bind parameter, by passing an +array of strings as parameter value. The elements of the array represent the +attribute keys of the path: + +```aql +LET doc = { a: { b: { c: 1 } } } +RETURN doc.@attr +``` + +```json +{ "attr": [ "a", "b", "c" ] } +``` + +The example query returns `[ 1 ]` as result. Note that `{ "attr": "a.b.c" }` +would return the value of an attribute called `a.b.c`, not the value of +attribute `c` with the parents `a` and `b` as `[ "a", "b", "c" ]` would. + +### Collection bind parameters + +A special type of bind parameter exists for injecting collection names. This +type of bind parameter has a name prefixed with an additional `@` symbol, so +`@@name` in the query. + +```aql +FOR u IN @@collection + FILTER u.active == true + RETURN u +``` + +The second `@` will be part of the bind parameter name, which is important to +remember when specifying the `bindVars` (note the leading `@`): + +```json +{ + "query": "FOR u IN @@collection FILTER u.active == true RETURN u", + "bindVars": { + "@collection": "users" + } +} +``` diff --git a/site/content/arangodb/oem/aql/fundamentals/data-types.md b/site/content/arangodb/oem/aql/fundamentals/data-types.md new file mode 100644 index 0000000000..51719ddb7c --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/data-types.md @@ -0,0 +1,290 @@ +--- +title: Data types in AQL +menuTitle: Data types +weight: 10 +description: >- + AQL supports both _primitive_ data types consisting of exactly one value and + _compound_ data types comprised of multiple values +--- +The following types are available: + +| Data type | Description | +|------------:|-------------| +| **null** | An empty value, also: the absence of a value +| **boolean** | Boolean truth value with possible values *false* and *true* +| **number** | Signed (real) number +| **string** | UTF-8 encoded text value +| **array** / list | Sequence of values, referred to by their positions +| **object** / document | Sequence of values, referred to by their names + +## Primitive types + +### Null value + +A `null` value can be used to represent an empty or absent value. +It is different from a numerical value of zero (`null != 0`) and other +*falsy* values (`false` or a zero-length string `""`). +It is also known as *nil* or *None* in other languages. + +The system may return `null` in the absence of value, for example +if you call a [function](../functions/_index.md) with unsupported values +as arguments or if you try to [access an attribute](accessing-data-from-collections.md) +which does not exist. + +### Boolean data type + +The Boolean data type has two possible values, `true` and `false`. +They represent the two truth values in logic and mathematics. + +### Numeric literals + +Numeric literals can be integers or real values (floating-point numbers). +They can optionally be signed with the `+` or `-` symbols. +A decimal point `.` is used as separator for the optional fractional part. +The scientific notation (*E-notation*) is also supported. + +``` + 1 + +1 + 42 + -1 +-42 + 1.23 +-99.99 + 0.5 + .5 + -4.87e103 + -4.87E103 +``` + +The following notations are invalid and will throw a syntax error: + +``` + 1. +01.23 +00.23 +00 +``` + +All numeric values are treated as 64-bit signed integer or 64-bit +double-precision floating point values internally. The internal floating-point +format used is IEEE 754. + +{{< warning >}} +When exposing any numeric integer values to JavaScript via +[user-defined AQL functions](../user-defined-functions.md), numbers that exceed 32 bit +precision are converted to floating-point values, so large integers can lose +some bits of precision. The same is true when converting AQL numeric results to +JavaScript (e.g. returning them to Foxx). +{{< /warning >}} + +Numeric integer literals can also be expressed as binary +(base 2) or hexadecimal (base 16) number literals. + +- The prefix for binary integer literals is `0b`, e.g. `0b10101110`. +- The prefix for hexadecimal integer literals is `0x`, e.g. `0xabcdef02`. + +Binary and hexadecimal integer literals can only be used for unsigned integers. +The maximum supported value for binary and hexadecimal numeric literals is +232 - 1, i.e. `0b11111111111111111111111111111111` (binary) or +`0xffffffff` (hexadecimal). + +### String literals + +String literals must be enclosed in single or double quotes. If the used quote +character is to be used itself within the string literal, it must be escaped +using the backslash symbol. A literal backslash also needs to be escaped with +a backslash. + +```aql +"yikes!" +"don't know" +"this is a \"quoted\" word" +"this is a longer string." +"the path separator on Windows is \\" + +'yikes!' +'don\'t know' +'this is a "quoted" word' +'this is a longer string.' +'the path separator on Windows is \\' +``` + +All string literals must be UTF-8 encoded. It is currently not possible to use +arbitrary binary data if it is not UTF-8 encoded. A workaround to use binary +data is to encode the data using [Base64](https://en.wikipedia.org/wiki/Base64) +or other algorithms on the application +side before storing, and decoding it on application side after retrieval. + +## Compound types + +AQL supports two compound types: + +- **array**: A composition of unnamed values, each accessible + by their positions. Sometimes called *list*. +- **object**: A composition of named values, each accessible + by their names. A *document* is an object at the top level. + +### Arrays / Lists + +The first supported compound type is the array type. Arrays are effectively +sequences of (unnamed / anonymous) values. Individual array elements can be +accessed by their positions. The order of elements in an array is important. + +An *array declaration* starts with a left square bracket `[` and ends with +a right square bracket `]`. The declaration contains zero, one or more +*expression*s, separated from each other with the comma `,` symbol. +Whitespace around elements is ignored in the declaration, thus line breaks, +tab stops and blanks can be used for formatting. + +In the easiest case, an array is empty and thus looks like: + +```json +[ ] +``` + +Array elements can be any legal *expression* values. Nesting of arrays is +supported. + +```json +[ true ] +[ 1, 2, 3 ] +[ -99, "yikes!", [ false, ["no"], [] ], 1 ] +[ [ "fox", "marshal" ] ] +``` + +A trailing comma after the last element is allowed: + +```aql +[ + 1, + 2, + 3, // trailing comma +] +``` + +Individual array values can later be accessed by their positions using the `[]` +accessor: + +```aql +u.friends[0] // access 1st array element +u.friends[-1] // access last array element +``` + +For more details about this array operator, see +[Indexed value access](../operators.md#indexed-value-access). + +### Objects / Documents + +The other supported compound type is the object (or document) type. Objects are a +composition of zero to many attributes. Each attribute is a name/value pair. +Object attributes can be accessed individually by their names. This data type is +also known as dictionary, map, associative array and other names. + +Object declarations start with a left curly bracket `{` and end with a +right curly bracket `}`. An object contains zero to many attribute declarations, +separated from each other with the `,` symbol. Whitespace around elements is ignored +in the declaration, thus line breaks, tab stops and blanks can be used for formatting. + +In the simplest case, an object is empty. Its declaration would then be: + +```json +{ } +``` + +Each attribute in an object is a name/value pair. Name and value of an +attribute are separated using the colon `:` symbol. The name is always a string, +whereas the value can be of any type including sub-objects. + +The attribute name is mandatory - there can't be anonymous values in an object. +It can be specified as a quoted or unquoted string: + +```aql +{ name: … } // unquoted +{ 'name': … } // quoted (apostrophe / "single quote mark") +{ "name": … } // quoted (quotation mark / "double quote mark") +``` + +It must be quoted if it contains whitespace, escape sequences or characters +other than ASCII letters (`a`-`z`, `A`-`Z`), digits (`0`-`9`), +underscores (`_`) and dollar signs (`$`). The first character has to be a +letter, underscore or dollar sign. + +If a [keyword](syntax.md#keywords) is used as an attribute name, +then the attribute name must be quoted: + +```aql +{ return: … } // error, return is a keyword! +{ 'return': … } // quoted string literal (single quote marks) +{ "return": … } // quoted string literal (double quote marks) +{ `return`: … } // quoted name (backticks) +{ ´return´: … } // quoted name (forward ticks) +``` + +A trailing comma after the last element is allowed: + +```aql +{ + "a": 1, + "b": 2, + "c": 3, // trailing comma +} +``` + +Attribute names can be computed using dynamic expressions, too. +To disambiguate regular attribute names from attribute name expressions, +computed attribute names must be enclosed in square brackets `[ … ]`: + +```aql +{ [ CONCAT("test/", "bar") ] : "someValue" } +``` + +There is also shorthand notation for attributes which is handy for +returning existing variables easily: + +```aql +LET name = "Peter" +LET age = 42 +RETURN { name, age } +``` + +The above is the shorthand equivalent for the generic form: + +```aql +LET name = "Peter" +LET age = 42 +RETURN { name: name, age: age } +``` + +Any valid expression can be used as an attribute value. That also means nested +objects can be used as attribute values: + +```aql +{ name : "Peter" } +{ "name" : "Vanessa", "age" : 15 } +{ "name" : "John", likes : [ "Swimming", "Skiing" ], "address" : { "street" : "Cucumber lane", "zip" : "94242" } } +``` + +Individual object attributes can later be accessed by their names using the +dot `.` accessor: + +```aql +u.address.city.name +u.friends[0].name.first +``` + +Attributes can also be accessed using the square bracket `[]` accessor. +In contrast to the dot accessor, the square brackets allow for expressions. +Note that the accessor for array elements also uses square brackets: + +```aql +u["address"]["city"]["name"] +u["friends"][0]["name"]["first"] + +LET attr1 = "friends" +LET attr2 = "name" +u[attr1][0][attr2][ CONCAT("fir", "st") ] +``` + +For more details about these object operators, see +[Attribute access](../operators.md#attribute-access). diff --git a/site/content/arangodb/oem/aql/fundamentals/limitations.md b/site/content/arangodb/oem/aql/fundamentals/limitations.md new file mode 100644 index 0000000000..35c488c31d --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/limitations.md @@ -0,0 +1,104 @@ +--- +title: Known limitations for AQL queries +menuTitle: Limitations +weight: 45 +description: >- + AQL has restrictions with regards to the complexity of queries and the data + they operate on, as well as design limitations to be aware of +--- +## Complexity limitations + +The following hard-coded limitations exist for AQL queries: + +- An AQL query cannot use more than _1000_ result registers. + One result register is needed for every named query variable and for + internal/anonymous query variables, e.g. for intermediate results. + Subqueries also require result registers. +- An AQL query cannot have more than _4000_ execution nodes in its initial + query execution plan. This number includes all execution nodes of the + initial execution plan, even if some of them could be + optimized away later by the query optimizer during plan optimization. +- An AQL query cannot use more than _2048_ collections/shards. + {{< tip >}} + From version 3.10.7 onward, this limit is configurable via the + `--query.max-collections-per-query` startup option. + {{< /tip >}} +- Expressions in AQL queries cannot have a nesting of more than _500_ levels. + As an example, the expression `1 + 2 + 3 + 4` is 3 levels deep + (because it is interpreted and executed as `1 + (2 + (3 + 4))`). +- When reading any data from JSON or VelocyPack input or when serializing + any data to JSON or VelocyPack, there is a maximum recursion depth for + nested arrays and objects, which is slightly below 200. Arrays or objects + with higher nesting than this cause `Too deep nesting in Array/Object` + exceptions. + +Please note that even queries that are still below these limits may not +yield good performance, especially when they have to put together data from lots +of different collections. Please also consider that large queries (in terms of +intermediate result size or final result size) can use considerable amounts of +memory and may hit the configurable memory limits for AQL queries. + +## Design limitations + +The following design limitations are known for AQL queries: + +- Subqueries that are used inside expressions are pulled out of these + expressions and executed beforehand. That means that subqueries do not + participate in lazy evaluation of operands, for example in the + [ternary operator](../operators.md#ternary-operator). Also see + [evaluation of subqueries](subqueries.md#evaluation-of-subqueries). +- It is not possible to use a collection in a read operation after + it was used for a write operation in the same AQL query. +- In the cluster, all collections that are accessed **dynamically** by + [traversals working with collection sets](../graph-queries/traversals.md#working-with-collection-sets) + (instead of named graphs) must be stated in the query's initial + [`WITH` statement](../high-level-operations/with.md). To make the `WITH` statement + required in single server as well (e.g. for testing a migration to cluster), + please start the server with the option `--query.require-with`. + +## Storage engine properties + +{{< info >}} +The following restrictions and limitations do not apply to JavaScript Transactions +and Stream Transactions, including AQL queries that run inside such transactions. +Their intended use case is for smaller transactions with full transactional +guarantees. So the following only applies to standalone AQL queries. +{{< /info >}} + +Data of ongoing transactions is stored in RAM. Transactions that get too big +(in terms of number of operations involved or the total size of data created or +modified by the transaction) are committed automatically. Effectively, this +means that big user transactions are split into multiple smaller RocksDB +transactions that are committed individually. The entire user transaction does +not necessarily have ACID properties in this case. + +The following startup options can be used to control the RAM usage and automatic +intermediate commits for the RocksDB engine: + +- `--rocksdb.max-transaction-size` + + Transaction size limit (in bytes). Transactions store all keys and values in + RAM, so large transactions run the risk of causing out-of-memory situations. + This setting allows you to ensure that does not happen by limiting the size of + any individual transaction. Transactions whose operations would consume more + RAM than this threshold value will abort automatically with error 32 ("resource + limit exceeded"). + +- `--rocksdb.intermediate-commit-size` + + If the size of all operations in a transaction reaches this threshold, the transaction + is committed automatically and a new transaction is started. The value is specified in bytes. + +- `--rocksdb.intermediate-commit-count` + + If the number of operations in a transaction reaches this value, the transaction is + committed automatically and a new transaction is started. + +The above values can also be adjusted per query, for example, by setting the +following attributes in the call to `db._query()` in the JavaScript API: + +- `maxTransactionSize`: transaction size limit in bytes +- `intermediateCommitSize`: maximum total size of operations after which an intermediate + commit is performed automatically +- `intermediateCommitCount`: maximum number of operations after which an intermediate + commit is performed automatically diff --git a/site/content/arangodb/oem/aql/fundamentals/query-errors.md b/site/content/arangodb/oem/aql/fundamentals/query-errors.md new file mode 100644 index 0000000000..d1b7f507fa --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/query-errors.md @@ -0,0 +1,41 @@ +--- +title: AQL query errors +menuTitle: Query Errors +weight: 40 +description: >- + Errors can occur for queries at compile time, like for syntax errors and + missing collections, but warnings and errors can also occur during query + execution +--- +Issuing an invalid query to the server results in a parse error if the query +is syntactically invalid. ArangoDB detects such errors during query +inspection and aborts further processing. The error number and an error +message are returned so that you can fix the errors. + +If a query passes the parsing stage, all collections explicitly referenced in +the query are known. If any of these collections doesn't exist, the query execution +is aborted and an appropriate error message is returned. + +Under some circumstances, executing a query may also produce errors or warnings +at runtime. This cannot be predicted from inspecting the query text alone. +This is because query operations can be data-dependent or are only evaluated +during the query execution, like looking up documents dynamically or using +document attributes that not all documents of the collection have. This can +subsequently lead to errors or warnings if these cases are not accounted for. + +Some examples of runtime errors: + +- **Division by zero**: Raised when an attempt is made to use the value + `0` as the divisor in an arithmetic division or modulus operation +- **Invalid operands for arithmetic operations**: Raised when an attempt + is made to use any non-numeric values as operands in arithmetic operations. + This includes unary (unary minus, unary plus) and binary operations (plus, + minus, multiplication, division, and modulus) +- **Array expected in query**: Raised when a non-array operand is used for an + operation that expects an array argument operand. This can happen if you + try to iterate over an attribute with a `FOR` operation, expecting it to be an + array, but if the attribute doesn't exist, then it has a value of `null` which + cannot be looped over. + +See the [Error codes and meanings](../../develop/error-codes.md) +for a complete list of ArangoDB errors. diff --git a/site/content/arangodb/oem/aql/fundamentals/query-results.md b/site/content/arangodb/oem/aql/fundamentals/query-results.md new file mode 100644 index 0000000000..bbee073926 --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/query-results.md @@ -0,0 +1,114 @@ +--- +title: AQL query results +menuTitle: Query Results +weight: 35 +description: >- + The result set of an AQL query is always an array of values, even if it + returns a single element only +--- +AQL queries and also [subqueries](subqueries.md) each produce an array with zero +or more elements. + +An empty array typically means that no (matching) data was found to act upon, or +that a write query didn't specify anything to return. + +```aql +FOR doc IN emptyCollection + RETURN doc // no documents +``` + +``` +FOR u IN users + FILTER age == -1 // no matches + RETURN u +``` + +```aql +UPDATE { id: 2, active: true } IN users +// no RETURN operation +``` + +The result set of the above examples is empty: + +```json +[ ] +``` + +If there is a single result, you get an array with one element back, not the +result value only. + + +```aql +FOR u IN users + LIMIT 1 + RETURN u.name +``` + +```json +[ "John" ] +``` + +If there are multiple results, you get an array with many elements back. + +```aql +FOR u IN users + RETURN u.name +``` + +```json +[ + "John", + "Vanessa", + "Amy" +] +``` + +The individual values in the result array of a query may or may not have a +homogeneous structure, depending on what is actually queried. + +For example, the individual documents of a collection can use different sets of +attribute names. When returning data from a collection with inhomogeneous +documents without modification, the result values have an inhomogeneous structure, +too. Each result value itself is a document: + +```aql +FOR u IN users + RETURN u +``` + +```json +[ + { "id": 1, "name": "John", "active": false }, + { "age": 32, "id": 2, "name": "Vanessa" }, + { "friends": [ "John", "Vanessa" ], "id": 3, "name": "Amy" } +] +``` + +However, if a fixed set of attributes from the collection is queried, then the +query result values have a homogeneous structure. Each result value is +still (a projection of) a document: + +```aql +FOR u IN users + RETURN { "id": u.id, "name": u.name } +``` + +```json +[ + { "id": 1, "name": "John" }, + { "id": 2, "name": "Vanessa" }, + { "id": 3, "name": "Amy" } +] +``` + +It is also possible to query scalar values only. In this case, the result set +is an array of scalars, and each result value is a scalar value: + +```aql +FOR u IN users + RETURN u.id +``` + +```json +[ 1, 2, 3 ] +``` diff --git a/site/content/arangodb/oem/aql/fundamentals/subqueries.md b/site/content/arangodb/oem/aql/fundamentals/subqueries.md new file mode 100644 index 0000000000..2efef66361 --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/subqueries.md @@ -0,0 +1,188 @@ +--- +title: Combining queries with subqueries in AQL +menuTitle: Subqueries +weight: 30 +description: >- + Subqueries let you form complex requests and allow you to process more data in + with a single query +--- +## How to use subqueries + +Wherever an expression is allowed in AQL, a subquery can be placed. A subquery +is a query part that can introduce its own local variables without affecting +variables and values in its outer scope(s). + +It is required that subqueries be put inside parentheses `(` and `)` to +explicitly mark their start and end points: + +```aql +FOR p IN persons + LET recommendations = ( // subquery start + FOR r IN recommendations + FILTER p.id == r.personId + SORT p.rank DESC + LIMIT 10 + RETURN r + ) // subquery end + RETURN { person : p, recommendations : recommendations } +``` + +A subquery's result can be assigned to a variable with +[`LET`](../high-level-operations/let.md) as shown above, so that it can be referenced +multiple times or just to improve the query readability. + +Function calls also use parentheses and AQL allows you to omit an extra pair if +you want to use a subquery as sole argument for a function, e.g. +`MAX()` instead of `MAX(())`: + +```aql +FOR p IN persons + COLLECT city = p.city INTO g + RETURN { + city : city, + numPersons : LENGTH(g), + maxRating: MAX( // subquery start + FOR r IN g + RETURN r.p.rating + ) // subquery end + } +``` + +The extra wrapping is required if there is more than one function argument, +however, e.g. `NOT_NULL((RETURN "ok"), "fallback")`. + +Subqueries may also include other subqueries. + +## Subquery results and unwinding + +Subqueries always return a result **array**, even if there is only +a single return value: + +```aql +RETURN ( RETURN 1 ) +``` + +```json +[ [ 1 ] ] +``` + +To avoid such a nested data structure, [`FIRST()`](../functions/array.md#first) +can be used for example: + +```aql +RETURN FIRST( RETURN 1 ) +``` + +```json +[ 1 ] +``` + +To unwind the result array of a subquery so that each element is returned as +top-level element in the overall query result, you can use a `FOR` loop: + +```aql +FOR elem IN (RETURN 1..3) // [1,2,3] + RETURN elem +``` + +```json +[ + 1, + 2, + 3 +] +``` + +Without unwinding, the query would be `RETURN (RETURN 1..3)` and the result +a nested array `[ [ 1, 2, 3 ] ]` with a single top-level element. + +## Evaluation of subqueries + +Subqueries that are used inside expressions are pulled out of these +expressions and executed beforehand. That means that subqueries do not +participate in lazy evaluation of operands, for example in the +[ternary operator](../operators.md#ternary-operator). + +Consider the following query: + +```aql +RETURN RAND() > 0.5 ? (RETURN 1) : 0 +``` + +It get transformed into something more like this, with the calculation of the +subquery happening before the evaluation of the condition: + +```aql +LET temp1 = (RETURN 1) +LET temp2 = RAND() > 0.5 ? temp1 : 0 +RETURN temp2 +``` + +The subquery is executed regardless of the condition. In other words, there is +no short-circuiting that would avoid the subquery from running in the case that +the condition evaluates to `false`. You may need to take this into account to +avoid query errors like + +> Query: AQL: collection or array expected as operand to FOR loop; you provided +> a value of type 'null' (while executing) + +```aql +LET maybe = DOCUMENT("coll/does_not_exist") +LET dependent = maybe ? ( + FOR attr IN ATTRIBUTES(maybe) + RETURN attr +) : "document not found" +RETURN dependent +``` + +The problem is that the subquery is executed under all circumstances, despite +the check whether `DOCUMENT()` found a document or not. It does not take into +account that `maybe` can be `null`, which cannot be iterated over with `FOR`. +A possible solution is to fall back to an empty array in the subquery to +effectively prevent the loop body from being run: + +```aql +LET maybe = DOCUMENT("coll/does_not_exist") +LET dependent = maybe ? ( + FOR attr IN NOT_NULL(ATTRIBUTES(maybe || {}), []) + RETURN attr +) : "document not found" +RETURN dependent +``` + +The additional fallback `maybe || {}` prevents a query warning + +> invalid argument type in call to function 'ATTRIBUTES()' + +that originates from a `null` value getting passed to the `ATTRIBUTES()` +function that expects an object. + +Similarly, when you use subqueries as sub-expressions that are combined with +logical `AND` or `OR`, the subqueries are always executed: + +```aql +RETURN false AND (RETURN ASSERT(false, "executed")) +``` + +```aql +RETURN true OR (RETURN ASSERT(false, "executed")) +``` + +If the first operand of a logical `AND` is `false`, the overall result is +`false` regardless of the second operand. If the first operand of a logical `OR` +is `true`, the overall result is `true` regardless of the second operand. +However, the subqueries are run nonetheless, causing both example queries to fail. + +You can prevent the subqueries from executing by prepending a `FILTER` operation +with the value of the logical operator's first operand and negating it in case +of an `OR`: + +```aql +LET cond = false +RETURN cond AND (FILTER cond RETURN ASSERT(false, "executed")) +``` + +```aql +LET cond = true +RETURN cond OR (FILTER !cond RETURN ASSERT(false, "executed")) +``` diff --git a/site/content/arangodb/oem/aql/fundamentals/syntax.md b/site/content/arangodb/oem/aql/fundamentals/syntax.md new file mode 100644 index 0000000000..f07b1ee2ad --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/syntax.md @@ -0,0 +1,347 @@ +--- +title: AQL Syntax +menuTitle: Syntax +weight: 5 +description: >- + Query types, whitespace, comments, keywords, and names in the AQL language + explained +--- +## Query types + +An AQL query must either return a result (indicated by usage of the `RETURN` +keyword) or execute a data-modification operation (indicated by usage +of one of the keywords `INSERT`, `UPDATE`, `REPLACE`, `REMOVE` or `UPSERT`). The AQL +parser will return an error if it detects more than one data-modification +operation in the same query or if it cannot figure out if the query is meant +to be a data retrieval or a modification operation. + +AQL only allows **one** query in a single query string; thus semicolons to +indicate the end of one query and separate multiple queries (as seen in SQL) are +not allowed. + +## Whitespace + +Whitespace (blanks, carriage returns, line feeds, and tab stops) can be used +in the query text to increase its readability. Tokens have to be separated by +any number of whitespace. Whitespace within strings or names must be enclosed +in quotes in order to be preserved. + +## Comments + +Comments can be embedded at any position in a query. The text contained in the +comment is ignored by the AQL parser. + +Multi-line comments cannot be nested, which means subsequent comment starts within +comments are ignored, comment ends will end the comment. + +AQL supports two types of comments: + +- Single line comments: These start with a double forward slash and end at + the end of the line, or the end of the query string (whichever is first). +- Multi line comments: These start with a forward slash and asterisk, and + end with an asterisk and a following forward slash. They can span as many + lines as necessary. + +```aql +/* this is a comment */ RETURN 1 +/* these */ RETURN /* are */ 1 /* multiple */ + /* comments */ 1 +/* this is + a multi line + comment */ +// a single line comment +``` + +## Keywords + +On the top level, AQL offers the following +[high-level operations](../high-level-operations/_index.md): + +| Operation | Description +|:----------|:----------- +| `FOR` | Array iteration +| `RETURN` | Results projection +| `FILTER` | Non-View results filtering +| `SEARCH` | View results filtering +| `SORT` | Result sorting +| `LIMIT` | Result slicing +| `LET` | Variable assignment +| `COLLECT` | Result grouping +| `WINDOW` | Aggregations over related rows +| `INSERT` | Insertion of new documents +| `UPDATE` | (Partial) update of existing documents +| `REPLACE` | Replacement of existing documents +| `REMOVE` | Removal of existing documents +| `UPSERT` | Insertion of new or update of existing documents +| `WITH` | Collection declaration + +Each of the above operations can be initiated in a query by using a keyword of +the same name. An AQL query can (and typically does) consist of multiple of the +above operations. + +An example AQL query may look like this: + +```aql +FOR u IN users + FILTER u.type == "newbie" && u.active == true + RETURN u.name +``` + +In this example query, the terms `FOR`, `FILTER`, and `RETURN` initiate the +higher-level operation according to their name. These terms are also keywords, +meaning that they have a special meaning in the language. + +For example, the query parser will use the keywords to find out which high-level +operations to execute. That also means keywords can only be used at certain +locations in a query. This also makes all keywords **reserved words** that must +not be used for other purposes than they are intended for. + +For example, it is not possible to use a keyword as literal unquoted string +(identifier) for a collection or attribute name. If a collection or attribute +needs to have the same name as a keyword, then the collection or attribute name +needs to be quoted in the query (also see [Names](#names)). + +Keywords are case-insensitive, meaning they can be specified in lower, upper, or +mixed case in queries. In this documentation, all keywords are written in upper +case to make them distinguishable from other query parts. + +There are a few more keywords in addition to the higher-level operation keywords. +Additional keywords may be added in future versions of ArangoDB. +The complete list of keywords is currently: + +- `AGGREGATE` +- `ALL` +- `ALL_SHORTEST_PATHS` +- `AND` +- `ANY` +- `ASC` +- `COLLECT` +- `DESC` +- `DISTINCT` +- `FALSE` +- `FILTER` +- `FOR` +- `GRAPH` +- `IN` +- `INBOUND` +- `INSERT` +- `INTO` +- `K_PATHS` +- `K_SHORTEST_PATHS` +- `LET` +- `LIKE` +- `LIMIT` +- `NONE` +- `NOT` +- `NULL` +- `OR` +- `OUTBOUND` +- `REMOVE` +- `REPLACE` +- `RETURN` +- `SHORTEST_PATH` +- `SORT` +- `TRUE` +- `UPDATE` +- `UPSERT` +- `WINDOW` +- `WITH` +{.columns-3} + +On top of that, there are a few words used in language constructs which are not +reserved keywords. You can use them as collection or attribute names +without having to quote them. The query parser can identify them as keyword-like +based on the context: + +- `KEEP` – + [COLLECT](../high-level-operations/collect.md#discarding-obsolete-variables) + operation variant +- `COUNT` – + [COLLECT](../high-level-operations/collect.md#group-length-calculation) + operation variant (`WITH COUNT INTO`) +- `OPTIONS` – + [FOR](../high-level-operations/for.md#options) / + [SEARCH](../high-level-operations/search.md#search-options) / + [COLLECT](../high-level-operations/collect.md#collect-options) / + [INSERT](../high-level-operations/insert.md#query-options) / + [UPDATE](../high-level-operations/update.md#query-options) / + [REPLACE](../high-level-operations/replace.md#query-options) / + [UPSERT](../high-level-operations/upsert.md#query-options) / + [REMOVE](../high-level-operations/remove.md#query-options) operation / + [Graph Traversal](../graph-queries/traversals.md) / + [Shortest Path](../graph-queries/shortest-path.md#path-search-options) / + [k Shortest Paths](../graph-queries/k-shortest-paths.md#path-search-options) / +- `PRUNE` – + [Graph Traversal](../graph-queries/traversals.md#pruning) (`FOR` operation variant) +- `SEARCH` – + [SEARCH](../high-level-operations/search.md) operation +- `TO` – + [Shortest Path](../graph-queries/shortest-path.md) / + [All Shortest Paths](../graph-queries/all-shortest-paths.md) / + [k Shortest Paths](../graph-queries/k-shortest-paths.md) / + [k Paths](../graph-queries/k-paths.md) + +Last but not least, there are special variables which are available in certain +contexts. Unlike keywords, they are **case-sensitive**: + +- `CURRENT` – + available in + [array inline expressions](../operators.md#inline-expressions) and the + [question mark operator](../operators.md#question-mark-operator) +- `NEW` – + available after + [INSERT](../high-level-operations/insert.md#returning-the-inserted-documents) / + [UPDATE](../high-level-operations/update.md#returning-the-modified-documents) / + [REPLACE](../high-level-operations/replace.md#returning-the-modified-documents) / + [UPSERT](../high-level-operations/upsert.md#returning-documents) + operation +- `OLD` – + available after + [UPDATE](../high-level-operations/update.md#returning-the-modified-documents) / + [REPLACE](../high-level-operations/replace.md#returning-the-modified-documents) / + [UPSERT](../high-level-operations/upsert.md#returning-documents) / + [REMOVE](../high-level-operations/remove.md#returning-the-removed-documents) + operation + +If you define a variable with the same name in the same scope, then its value +will be and remain at what you set it to. Hence you need to avoid these names +for your own variables if you want to access the special variable values. + +## Names + +In general, names are used to identify the following things in AQL queries: +- collections +- attributes +- variables +- functions + +Names in AQL are always case-sensitive. +The maximum supported length for collection/View names is 256 bytes. +Variable names can be longer, but are discouraged. + +Keywords should not be used as names. If you want to use a reserved keyword as +name anyway, the name must be enclosed in backticks or forward ticks. This is referred to as _quoting_. + +```aql +FOR doc IN `filter` + RETURN doc.`sort` +``` + +Due to the backticks, `filter` and `sort` are interpreted as names and not as +keywords here. + +You can also use forward ticks: + +```aql +FOR f IN ´filter´ + RETURN f.´sort´ +``` + +Instead of ticks, you may use the bracket notation for the attribute access: + +```aql +FOR f IN `filter` + RETURN f["sort"] +``` + +`sort` is a string literal in quote marks in this alternative and does thus not +conflict with the reserved keyword. + +Quoting with ticks is also required if certain characters such as +hyphen minus (`-`) are contained in a name, namely if they are used for +[operators](../operators.md) in AQL: + +```aql +LET `my-var` = 42 +``` + +### Collection names + +You can typically use collection names in queries as they are. If a collection +happens to have the same name as a keyword, the name must be enclosed in +backticks or forward ticks. + +Quoting with ticks is also required if special characters such as +hyphen minus (`-`) are contained in a collection name: + +```aql +FOR doc IN `my-coll` + RETURN doc +``` + +The collection `my-coll` has a dash in its name, but `-` is an arithmetic +operator for subtraction in AQL. The backticks quote the collection name to +refer to the collection correctly. + +If you use extended collection and View names +([`--database.extended-names` startup option](../../components/arangodb-server/options.md#--databaseextended-names)), +they may contain spaces, or non-ASCII characters such as Japanese or Arabic +letters, emojis, letters with accentuation, and other UTF-8 characters. +Quoting is required in these cases, too: + +```aql +FOR doc IN ´🥑~колекція =)´ + RETURN doc +``` + +The collection name contains characters that are allowed using the extended +naming constraints and is quoted with forward ticks. + +Note that quoting the name with `"` or `'` is not possible for collections as +they cannot be string literals in quote marks. + +For information about the naming constraints for collections, see +[Collection names](../../concepts/data-structure/collections.md#collection-names). + +### Attribute names + +When referring to attributes of documents from a collection, the fully qualified +attribute name must be used. This is because multiple collections with ambiguous +attribute names may be used in a query. To avoid any ambiguity, it is not +allowed to refer to an unqualified attribute name. + +Also see the naming restrictions for +[Attribute names](../../concepts/data-structure/documents/_index.md#attribute-names). + +```aql +FOR u IN users + FOR f IN friends + FILTER u.active == true && f.active == true && u.id == f.userId + RETURN u.name +``` + +In the above example, the attribute names `active`, `name`, `id`, and `userId` +are qualified using the collection names they belong to (`u` and `f` +respectively). + +### Variable names + +AQL allows you to assign values to additional variables in a query. +All variables that are assigned a value must have a name that is unique within +the context of the query. + +```aql +FOR u IN users + LET friends = u.friends + RETURN { "name" : u.name, "friends" : friends } +``` + +In the above query, `users` is a collection name, and both `u` and `friends` are +variable names. This is because the `FOR` and `LET` operations need target +variables to store their intermediate results. + +Variable names should be different from the names of any collection name used in +the same query to avoid shadowing, which can render a collection with the same +name inaccessible in the query after the variable assignment: + +```aql +LET users = [] +FOR u IN users // iterates over the "users" variable, not the "users" collection + RETURN u +``` + +Allowed characters in variable names are the letters `a` to `z` (both in lower +and upper case), the numbers `0` to `9`, the underscore (`_`) symbol and the +dollar (`$`) sign. A variable name must not start with a number. If a variable +name starts with one or multiple underscore characters, the underscore(s) must +be followed by least one letter (a-z or A-Z). The dollar sign can only be used +as the very first character in a variable name and must be followed by a letter. diff --git a/site/content/arangodb/oem/aql/fundamentals/type-and-value-order.md b/site/content/arangodb/oem/aql/fundamentals/type-and-value-order.md new file mode 100644 index 0000000000..bfe5a3baee --- /dev/null +++ b/site/content/arangodb/oem/aql/fundamentals/type-and-value-order.md @@ -0,0 +1,137 @@ +--- +title: Type and value order in AQL +menuTitle: Type and value order +weight: 20 +description: >- + AQL uses a set of rules for equality checks and comparisons that takes both + the data types and the actual values into account +--- +When checking for equality or inequality, or when determining the sort order of +values, AQL uses a deterministic algorithm for the comparison. + +The compared operands are first compared by their data types, and only by their +data values if the operands have the same data types. + +The following type order is used when comparing data types: + +``` +null < bool < number < string < array (or list) < object (or document) +``` + +This means `null` is the smallest type in AQL and *object* is the type with +the highest order. If the compared operands have a different type, then the +comparison result is determined and the comparison is finished. + +For example, the boolean `true` value is always less than any numeric or +string value, any array (even an empty array), and any object. Additionally, any +string value (even an empty string) is always greater than any numeric +value and a boolean value (`true` and `false`). + +```aql +null < false +null < true +null < 0 +null < '' +null < ' ' +null < '0' +null < 'abc' +null < [ ] +null < { } + +false < true +false < 0 +false < '' +false < ' ' +false < '0' +false < 'abc' +false < [ ] +false < { } + +true < 0 +true < '' +true < ' ' +true < '0' +true < 'abc' +true < [ ] +true < { } + +0 < '' +0 < ' ' +0 < '0' +0 < 'abc' +0 < [ ] +0 < { } + +'' < ' ' +'' < '0' +'' < 'abc' +'' < [ ] +'' < { } + +[ ] < { } +``` + +If the two compared operands have the same data types, then the operands values +are compared. For the primitive types (null, boolean, number, and string), the +result is defined as follows: + +- **null**: `null` is equal to `null` +- **boolean**: `false` is less than `true` +- **number**: numeric values are ordered by their cardinal value +- **string**: string values are ordered using a localized comparison, using the configured + [server language](../../components/arangodb-server/options.md#--default-language) + for sorting according to the alphabetical order rules of that language + +Note: unlike in SQL, `null` can be compared to any value, including `null` +itself, without the result being converted into `null` automatically. + +For compound types (array and object), the following special rules are applied: + +Two **array** values are compared by comparing their individual elements position by +position, starting at the first element. For each position, the element types +are compared first. If the types are not equal, the comparison result is +determined, and the comparison is finished. If the types are equal, then the +values of the two elements are compared. If one of the arrays is finished and +the other array still has an element at a compared position, then `null` is +used as the element value of the fully traversed array. + +If an array element is itself a compound value (an array or an object), then the +comparison algorithm checks the element's sub-values recursively. The element's +sub-elements are compared recursively. + +```aql +[ ] < [ 0 ] +[ 1 ] < [ 2 ] +[ 1, 2 ] < [ 2 ] +[ 99, 99 ] < [ 100 ] +[ false ] < [ true ] +[ false, 1 ] < [ false, '' ] +``` + +Two **object** operands are compared by checking attribute names and value. The +attribute names are compared first. Before attribute names are compared, a +combined array of all attribute names from both operands is created and sorted +lexicographically. This means that the order in which attributes are declared +in an object is not relevant when comparing two objects. + +The combined and sorted array of attribute names is then traversed, and the +respective attributes from the two compared operands are then looked up. If one +of the objects does not have an attribute with the sought name, its attribute +value is considered to be `null`. Finally, the attribute value of both +objects is compared using the aforementioned data type and value comparison. +The comparisons are performed for all object attributes until there is an +unambiguous comparison result. If an unambiguous comparison result is found, the +comparison is finished. If there is no unambiguous comparison result, the two +compared objects are considered equal. + +```aql +{ } == { "a" : null } + +{ } < { "a" : 1 } +{ "a" : 1 } < { "a" : 2 } +{ "b" : 1 } < { "a" : 0 } +{ "a" : { "c" : true } } < { "a" : { "c" : 0 } } +{ "a" : { "c" : true, "a" : 0 } } < { "a" : { "c" : false, "a" : 1 } } + +{ "a" : 1, "b" : 2 } == { "b" : 2, "a" : 1 } +``` diff --git a/site/content/arangodb/oem/aql/graph-queries/_index.md b/site/content/arangodb/oem/aql/graph-queries/_index.md new file mode 100644 index 0000000000..99bf3dd6e7 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/_index.md @@ -0,0 +1,47 @@ +--- +title: Graph queries in AQL +menuTitle: Graph queries +weight: 35 +description: >- + You can perform graph traversals and path searches on named graphs as well as + collection sets with AQL +--- +There are multiple ways to work with [graphs in ArangoDB](../../graphs/_index.md), +as well as different ways to query your graphs using AQL. + +The two options in managing graphs are to either use + +- named graphs where ArangoDB manages the collections involved in one graph, or +- graph functions on a combination of document and edge collections. + +Named graphs can be defined through the [graph-module](../../graphs/general-graphs/_index.md) +or via the [web interface](../../components/web-interface/_index.md). +The definition contains the name of the graph, and the vertex and edge collections +involved. Since the management functions are layered on top of simple sets of +document and edge collections, you can also use regular AQL functions to work with them. + +Both variants (named graphs and loosely coupled collection sets a.k.a. anonymous graphs) +are supported by the AQL language constructs for graph querying. These constructs +make full use of optimizations and therefore best performance is to be expected: + +- [AQL Traversals](traversals.md) to follow edges connected to a start vertex, + up to a variable depth. It can be combined with AQL filter conditions. + +- [AQL Shortest Path](shortest-path.md) to find one shortest path + between two given documents. + +- [AQL All Shortest Paths](all-shortest-paths.md) to find all shortest + paths between two given documents. + +- [AQL k Shortest Paths](k-shortest-paths.md) to find the first *k* + paths in order of length (or weight) between two given documents. + +- [AQL k Paths](k-paths.md) to find all paths between two given documents. + +These types of queries are only useful if you use edge collections and/or graphs in +your data model. + +{{< info >}} +New to graphs? [Take our free graph course for freshers](https://www.arangodb.com/arangodb-graph-course/) +and get from zero knowledge to advanced query techniques. +{{< /info >}} diff --git a/site/content/arangodb/oem/aql/graph-queries/all-shortest-paths.md b/site/content/arangodb/oem/aql/graph-queries/all-shortest-paths.md new file mode 100644 index 0000000000..571a6857d3 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/all-shortest-paths.md @@ -0,0 +1,197 @@ +--- +title: All Shortest Paths in AQL +menuTitle: All Shortest Paths +weight: 20 +description: >- + Find all paths of shortest length between two vertices +--- +## General query idea + +This type of query finds all paths of shortest length between two given +documents (*startVertex* and *targetVertex*) in your graph. + +Every returned path is a JSON object with two attributes: + +- An array containing the `vertices` on the path. +- An array containing the `edges` on the path. + +**Example** + +A visual representation of the example graph: + +![Train Connection Map](../../../../images/train_map.png) + +Each ellipse stands for a train station with the name of the city written inside +of it. They are the vertices of the graph. Arrows represent train connections +between cities and are the edges of the graph. + +Assuming that you want to go from **Carlisle** to **London** by train, the +expected two shortest paths are: + +1. Carlisle – Birmingham – London +2. Carlisle – York – London + +Another path that connects Carlisle and London is +Carlisle – Glasgow – Edinburgh – York – London, but it has two more stops and +is therefore not a path of the shortest length. + +## Syntax + +The syntax for All Shortest Paths queries is similar to the one for +[Shortest Path](shortest-path.md) and there are also two options to +either use a named graph or a set of edge collections. It only emits a path +variable however, whereas `SHORTEST_PATH` emits a vertex and an edge variable. + +### Working with named graphs + +```aql +FOR path + IN OUTBOUND|INBOUND|ANY ALL_SHORTEST_PATHS + startVertex TO targetVertex + GRAPH graphName +``` + +- `FOR`: Emits the variable **path** which contains one shortest path as an + object, with the `vertices` and `edges` of the path. +- `IN` `OUTBOUND|INBOUND|ANY`: Defines in which direction + edges are followed (outgoing, incoming, or both) +- `ALL_SHORTEST_PATHS`: The keyword to compute All Shortest Paths +- **startVertex** `TO` **targetVertex** (both string\|object): The two vertices between + which the paths are computed. This can be specified in the form of + a ID string or in the form of a document with the attribute `_id`. All other + values result in a warning and an empty result. If one of the specified + documents does not exist, the result is empty as well and there is no warning. +- `GRAPH` **graphName** (string): The name identifying the named graph. Its vertex and + edge collections are looked up for the path search. + +{{< info >}} +All Shortest Paths traversals do not support edge weights. +{{< /info >}} + +### Working with collection sets + +```aql +FOR path + IN OUTBOUND|INBOUND|ANY ALL_SHORTEST_PATHS + startVertex TO targetVertex + edgeCollection1, ..., edgeCollectionN +``` + +Instead of `GRAPH graphName` you can specify a list of edge collections. +The involved vertex collections are determined by the edges of the given +edge collections. + +### Traversing in mixed directions + +For All Shortest Paths with a list of edge collections, you can optionally specify the +direction for some of the edge collections. Say, for example, you have three edge +collections *edges1*, *edges2* and *edges3*, where in *edges2* the direction +has no relevance, but in *edges1* and *edges3* the direction should be taken into +account. In this case you can use `OUTBOUND` as a general search direction and `ANY` +specifically for *edges2* as follows: + +```aql +FOR path IN OUTBOUND ALL_SHORTEST_PATHS + startVertex TO targetVertex + edges1, ANY edges2, edges3 +``` + +All collections in the list that do not specify their own direction use the +direction defined after `IN` (here: `OUTBOUND`). This allows using a different +direction for each collection in your path search. + +## Examples + +Load an example graph to get a named graph that reflects some possible +train connections in Europe and North America: + +![Train Connection Map](../../../../images/train_map.png) + +```js +--- +name: GRAPHASP_01_create_graph +description: '' +--- +~addIgnoreCollection("places"); +~addIgnoreCollection("connections"); +var examples = require("@arangodb/graph-examples/example-graph"); +var graph = examples.loadGraph("kShortestPathsGraph"); +db.places.toArray(); +db.connections.toArray(); +``` + +Suppose you want to query a route from **Carlisle** to **London**, and +compare the outputs of `SHORTEST_PATH`, `K_SHORTEST_PATHS` and `ALL_SHORTEST_PATHS`. +Note that `SHORTEST_PATH` returns any of the shortest paths, whereas +`ALL_SHORTEST_PATHS` returns all of them. `K_SHORTEST_PATHS` returns the +shortest paths first but continues with longer paths, until it found all routes +or reaches the defined limit (the number of paths). + +Using `SHORTEST_PATH` to get one shortest path: + +```aql +--- +name: GRAPHASP_01_Carlisle_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e IN OUTBOUND SHORTEST_PATH 'places/Carlisle' TO 'places/London' +GRAPH 'kShortestPathsGraph' + RETURN { place: v.label } +``` + +Using `ALL_SHORTEST_PATHS` to get both shortest paths: + +```aql +--- +name: GRAPHASP_02_Carlisle_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND ALL_SHORTEST_PATHS 'places/Carlisle' TO 'places/London' +GRAPH 'kShortestPathsGraph' + RETURN { places: p.vertices[*].label } +``` + +Using `K_SHORTEST_PATHS` without a limit to get all paths in order of +increasing length: + +```aql +--- +name: GRAPHASP_03_Carlisle_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND K_SHORTEST_PATHS 'places/Carlisle' TO 'places/London' +GRAPH 'kShortestPathsGraph' + RETURN { places: p.vertices[*].label } +``` + +If you ask for routes that don't exist, you get an empty result +(from **Carlisle** to **Toronto**): + +```aql +--- +name: GRAPHASP_04_Carlisle_to_Toronto +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND ALL_SHORTEST_PATHS 'places/Carlisle' TO 'places/Toronto' +GRAPH 'kShortestPathsGraph' + RETURN { + places: p.vertices[*].label + } +``` + +And finally clean up by removing the named graph: + +```js +--- +name: GRAPHASP_99_drop_graph +description: '' +--- +var examples = require("@arangodb/graph-examples/example-graph"); +examples.dropGraph("kShortestPathsGraph"); +~removeIgnoreCollection("places"); +~removeIgnoreCollection("connections"); +``` diff --git a/site/content/arangodb/oem/aql/graph-queries/k-paths.md b/site/content/arangodb/oem/aql/graph-queries/k-paths.md new file mode 100644 index 0000000000..e4da13c5e3 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/k-paths.md @@ -0,0 +1,232 @@ +--- +title: k Paths in AQL +menuTitle: k Paths +weight: 30 +description: >- + Find all paths between two vertices with a fixed range of path lengths +--- +## General query idea + +This type of query finds all paths between two given documents +(*startVertex* and *targetVertex*) in your graph. The paths are restricted +by a minimum and maximum length that you specify. + +Every such path is returned as a JSON object with two components: + +- an array containing the `vertices` on the path +- an array containing the `edges` on the path + +**Example** + +Here is an example graph to explain how the k Paths algorithm works: + +![Train Connection Map](../../../../images/train_map.png) + +Each ellipse stands for a train station with the name of the city written inside +of it. They are the vertices of the graph. Arrows represent train connections +between cities and are the edges of the graph. The numbers near the arrows +describe how long it takes to get from one station to another. They are used +as edge weights. + +Assume that you want to go from **Aberdeen** to **London** by train. + +You have a couple of alternatives: + +a) Straight way + + 1. Aberdeen + 2. Leuchars + 3. Edinburgh + 4. York + 5. London + +b) Detour at York + + 1. Aberdeen + 2. Leuchars + 3. Edinburgh + 4. York + 5. **Carlisle** + 6. **Birmingham** + 7. London + +c) Detour at Edinburgh + + 1. Aberdeen + 2. Leuchars + 3. Edinburgh + 4. **Glasgow** + 5. **Carlisle** + 6. **Birmingham** + 7. London + +d) Detour at Edinburgh to York + + 1. Aberdeen + 2. Leuchars + 3. Edinburgh + 4. **Glasgow** + 5. **Carlisle** + 6. York + 7. London + +Note that only paths that do not contain the same vertex twice are consider to +be valid. The following alternative would visit Aberdeen twice and is **not** +returned by the k Paths algorithm: + +1. Aberdeen +2. **Inverness** +3. **Aberdeen** +4. Leuchars +5. Edinburgh +6. York +7. London + +## Example Use Cases + +The use-cases for k Paths are about the same as for unweighted k Shortest Paths. +The main difference is that k Shortest Paths enumerates all paths with +**increasing length**. It stops as soon as a given number of paths is reached. +k Paths enumerates all paths within a given **range of path lengths** instead, +and is thereby upper-bounded. + +The k Paths traversal can be used as foundation for several other algorithms: + +- **Transportation** of any kind (e.g. road traffic, network package routing) +- **Flow problems**: You need to transfer items from A to B, which alternatives + do you have? What is their capacity? + +## Syntax + +The syntax for k Paths queries is similar to the one for +[K Shortest Path](k-shortest-paths.md) with the addition to define the +minimum and maximum length of the path. + +{{< warning >}} +It is highly recommended that you use a reasonable maximum path length or a +**LIMIT** statement, as k Paths is a potentially expensive operation. It can +return a large number of paths for large connected graphs. +{{< /warning >}} + +### Working with named graphs + +```aql +FOR path + IN MIN..MAX OUTBOUND|INBOUND|ANY K_PATHS + startVertex TO targetVertex + GRAPH graphName +``` + +- `FOR`: Emits the variable **path** which contains one path as an object + containing `vertices` and `edges` of the path. +- `IN` `MIN..MAX`: The minimal and maximal depth for the traversal: + - **min** (number, *optional*): Paths returned by this query + have at least a length of this many edges. + If not specified, it defaults to `1`. The minimal possible value is `0`. + - **max** (number, *optional*): Paths returned by this query + have at most a length of this many edges. + If omitted, it defaults to the value of `min`. Thus, only the vertices and + edges in the range of `min` are returned. You cannot specify `max` without `min`. +- `OUTBOUND|INBOUND|ANY`: Defines in which direction + edges are followed (outgoing, incoming, or both). +- `K_PATHS`: The keyword to compute all paths with the specified lengths. +- **startVertex** `TO` **targetVertex** (both string\|object): The two vertices + between which the paths are computed. This can be specified in the form of + a document identifier string or in the form of an object with the `_id` + attribute. All other values lead to a warning and an empty result. This is + also the case if one of the specified documents does not exist. +- `GRAPH` **graphName** (string): The name identifying the named graph. + Its vertex and edge collections are looked up for the path search. + +### Working with collection sets + +```aql +FOR path + IN MIN..MAX OUTBOUND|INBOUND|ANY K_PATHS + startVertex TO targetVertex + edgeCollection1, ..., edgeCollectionN + [OPTIONS options] +``` + +Instead of `GRAPH graphName` you can specify a list of edge collections. +The involved vertex collections are determined by the edges of the given +edge collections. + +### Traversing in mixed directions + +For k paths with a list of edge collections you can optionally specify the +direction for some of the edge collections. Say for example you have three edge +collections *edges1*, *edges2* and *edges3*, where in *edges2* the direction +has no relevance, but in *edges1* and *edges3* the direction should be taken +into account. In this case you can use `OUTBOUND` as general search direction +and `ANY` specifically for *edges2* as follows: + +```aql +FOR vertex IN OUTBOUND K_PATHS + startVertex TO targetVertex + edges1, ANY edges2, edges3 +``` + +All collections in the list that do not specify their own direction use the +direction defined after `IN` (here: `OUTBOUND`). This allows to use a different +direction for each collection in your path search. + +## Examples + +You can load the `kShortestPathsGraph` example graph to get a named graph that +reflects some possible train connections in Europe and North America. + +![Train Connection Map](../../../../images/train_map.png) + +```js +--- +name: GRAPHKP_01_create_graph +description: '' +--- +~addIgnoreCollection("places"); +~addIgnoreCollection("connections"); +var examples = require("@arangodb/graph-examples/example-graph"); +var graph = examples.loadGraph("kShortestPathsGraph"); +db.places.toArray(); +db.connections.toArray(); +``` + +Suppose you want to query all routes from **Aberdeen** to **London**. + +```aql +--- +name: GRAPHKP_01_Aberdeen_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN 1..10 OUTBOUND K_PATHS 'places/Aberdeen' TO 'places/London' +GRAPH 'kShortestPathsGraph' + RETURN { places: p.vertices[*].label, travelTimes: p.edges[*].travelTime } +``` + +If you ask for routes that don't exist, you get an empty result +(from **Aberdeen** to **Toronto**): + +```aql +--- +name: GRAPHKP_02_Aberdeen_to_Toronto +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN 1..10 OUTBOUND K_PATHS 'places/Aberdeen' TO 'places/Toronto' +GRAPH 'kShortestPathsGraph' + RETURN { places: p.vertices[*].label, travelTimes: p.edges[*].travelTime } +``` + +And finally clean up by removing the named graph: + +```js +--- +name: GRAPHKP_99_drop_graph +description: '' +--- +var examples = require("@arangodb/graph-examples/example-graph"); +examples.dropGraph("kShortestPathsGraph"); +~removeIgnoreCollection("places"); +~removeIgnoreCollection("connections"); +``` diff --git a/site/content/arangodb/oem/aql/graph-queries/k-shortest-paths.md b/site/content/arangodb/oem/aql/graph-queries/k-shortest-paths.md new file mode 100644 index 0000000000..917dba2516 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/k-shortest-paths.md @@ -0,0 +1,308 @@ +--- +title: k Shortest Paths in AQL +menuTitle: k Shortest Paths +weight: 25 +description: >- + Find a number of shortest paths in the order of increasing path length or weight +--- +## General query idea + +This type of query finds the first *k* paths in order of length +(or weight) between two given documents (*startVertex* and *targetVertex*) in +your graph. + +Every such path is returned as a JSON object with three components: + +- an array containing the `vertices` on the path +- an array containing the `edges` on the path +- the `weight` of the path, that is the sum of all edge weights + +If no `weightAttribute` is specified, the weight of the path is just its length. + +{{< youtube id="XdITulJFdVo" >}} + +**Example** + +Here is an example graph to explain how the k Shortest Paths algorithm works: + +![Train Connection Map](../../../../images/train_map.png) + +Each ellipse stands for a train station with the name of the city written inside +of it. They are the vertices of the graph. Arrows represent train connections +between cities and are the edges of the graph. The numbers near the arrows +describe how long it takes to get from one station to another. They are used +as edge weights. + +Let us assume that you want to go from **Aberdeen** to **London** by train. + +You expect to see the following vertices on *the* shortest path, in this order: + +1. Aberdeen +2. Leuchars +3. Edinburgh +4. York +5. London + +By the way, the weight of the path is: 1.5 + 1.5 + 3.5 + 1.8 = **8.3**. + +Let us look at alternative paths next, for example because you know that the +direct connection between York and London does not operate currently. +An alternative path, which is slightly longer, goes like this: + +1. Aberdeen +2. Leuchars +3. Edinburgh +4. York +5. **Carlisle** +6. **Birmingham** +7. London + +Its weight is: 1.5 + 1.5 + 3.5 + 2.0 + 1.5 = **10.0**. + +Another route goes via Glasgow. There are seven stations on the path as well, +however, it is quicker if you compare the edge weights: + +1. Aberdeen +2. Leuchars +3. Edinburgh +4. **Glasgow** +5. Carlisle +6. Birmingham +7. London + +The path weight is lower: 1.5 + 1.5 + 1.0 + 1.0 + 2.0 + 1.5 = **8.5**. + +## Syntax + +The syntax for k Shortest Paths queries is similar to the one for +[Shortest Path](shortest-path.md) and there are also two options to +either use a named graph or a set of edge collections. It only emits a path +variable however, whereas `SHORTEST_PATH` emits a vertex and an edge variable. + +{{< warning >}} +It is highly recommended that you use a **LIMIT** statement, as +k Shortest Paths is a potentially expensive operation. On large connected +graphs it can return a large number of paths, or perform an expensive +(but unsuccessful) search for more short paths. +{{< /warning >}} + +### Working with named graphs + +```aql +FOR path + IN OUTBOUND|INBOUND|ANY K_SHORTEST_PATHS + startVertex TO targetVertex + GRAPH graphName + [OPTIONS options] + [LIMIT offset, count] +``` + +- `FOR`: Emits the variable **path** which contains one path as an object containing + `vertices`, `edges`, and the `weight` of the path. +- `IN` `OUTBOUND|INBOUND|ANY`: Defines in which direction + edges are followed (outgoing, incoming, or both). +- `K_SHORTEST_PATHS`: The keyword to compute k Shortest Paths +- **startVertex** `TO` **targetVertex** (both string\|object): The two vertices between + which the paths are computed. This can be specified in the form of + a ID string or in the form of a document with the attribute `_id`. All other + values lead to a warning and an empty result. If one of the specified + documents does not exist, the result is empty as well and there is no warning. +- `GRAPH` **graphName** (string): The name identifying the named graph. Its vertex and + edge collections are looked up by the path search. +- `OPTIONS` **options** (object, *optional*): + See the [path search options](#path-search-options). +- `LIMIT` (see [LIMIT operation](../high-level-operations/limit.md), *optional*): + the maximal number of paths to return. It is highly recommended to use + a `LIMIT` for `K_SHORTEST_PATHS`. + +{{< info >}} +k Shortest Paths traversals do not support negative weights. If a document +attribute (as specified by `weightAttribute`) with a negative value is +encountered during traversal, or if `defaultWeight` is set to a negative +number, then the query is aborted with an error. +{{< /info >}} + +### Working with collection sets + +```aql +FOR path + IN OUTBOUND|INBOUND|ANY K_SHORTEST_PATHS + startVertex TO targetVertex + edgeCollection1, ..., edgeCollectionN + [OPTIONS options] + [LIMIT offset, count] +``` + +Instead of `GRAPH graphName` you can specify a list of edge collections. +The involved vertex collections are determined by the edges of the given +edge collections. + +### Path search options + +You can optionally specify the following options to modify the execution of a +graph path search. If you specify unknown options, query warnings are raised. + +#### `weightAttribute` + +A top-level edge attribute that should be used to read the edge weight (string). + +If the attribute does not exist or is not numeric, the `defaultWeight` is used +instead. + +The attribute value must not be negative. + +#### `defaultWeight` + +This value is used as fallback if there is no `weightAttribute` in the +edge document, or if it's not a number (number). + +The value must not be negative. The default is `1`. + +### Traversing in mixed directions + +For k shortest paths with a list of edge collections you can optionally specify the +direction for some of the edge collections. Say for example you have three edge +collections *edges1*, *edges2* and *edges3*, where in *edges2* the direction +has no relevance, but in *edges1* and *edges3* the direction should be taken into +account. In this case you can use `OUTBOUND` as general search direction and `ANY` +specifically for *edges2* as follows: + +```aql +FOR vertex IN OUTBOUND K_SHORTEST_PATHS + startVertex TO targetVertex + edges1, ANY edges2, edges3 +``` + +All collections in the list that do not specify their own direction use the +direction defined after `IN` (here: `OUTBOUND`). This allows to use a different +direction for each collection in your path search. + +## Examples + +You can load the `kShortestPathsGraph` example graph to get a named graph that +reflects some possible train connections in Europe and North America. + +![Train Connection Map](../../../../images/train_map.png) + +```js +--- +name: GRAPHKSP_01_create_graph +description: '' +--- +~addIgnoreCollection("places"); +~addIgnoreCollection("connections"); +var examples = require("@arangodb/graph-examples/example-graph"); +var graph = examples.loadGraph("kShortestPathsGraph"); +db.places.toArray(); +db.connections.toArray(); +``` + +Suppose you want to query a route from **Aberdeen** to **London**, and +compare the outputs of `SHORTEST_PATH` and `K_SHORTEST_PATHS` with +`LIMIT 1`. Note that while `SHORTEST_PATH` and `K_SHORTEST_PATH` with +`LIMIT 1` should return a path of the same length (or weight), they do +not need to return the same path. + +Using `SHORTEST_PATH`: + +```aql +--- +name: GRAPHKSP_01_Aberdeen_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e IN OUTBOUND SHORTEST_PATH 'places/Aberdeen' TO 'places/London' +GRAPH 'kShortestPathsGraph' + RETURN { place: v.label, travelTime: e.travelTime } +``` + +Using `K_SHORTEST_PATHS`: + +```aql +--- +name: GRAPHKSP_02_Aberdeen_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND K_SHORTEST_PATHS 'places/Aberdeen' TO 'places/London' +GRAPH 'kShortestPathsGraph' + LIMIT 1 + RETURN { places: p.vertices[*].label, travelTimes: p.edges[*].travelTime } +``` + +With `K_SHORTEST_PATHS`, you can ask for more than one option for a route: + +```aql +--- +name: GRAPHKSP_03_Aberdeen_to_London +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND K_SHORTEST_PATHS 'places/Aberdeen' TO 'places/London' +GRAPH 'kShortestPathsGraph' + LIMIT 3 + RETURN { + places: p.vertices[*].label, + travelTimes: p.edges[*].travelTime, + travelTimeTotal: SUM(p.edges[*].travelTime) + } +``` + +If you ask for routes that don't exist, you get an empty result +(from **Aberdeen** to **Toronto**): + +```aql +--- +name: GRAPHKSP_04_Aberdeen_to_Toronto +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND K_SHORTEST_PATHS 'places/Aberdeen' TO 'places/Toronto' +GRAPH 'kShortestPathsGraph' + LIMIT 3 + RETURN { + places: p.vertices[*].label, + travelTimes: p.edges[*].travelTime, + travelTimeTotal: SUM(p.edges[*].travelTime) + } +``` + +You can use the `travelTime` attribute that connections have as edge weights to +take into account which connections are quicker. A high default weight is set, +to be used if an edge has no `travelTime` attribute (not the case with the +example graph). This returns the top three routes with the fewest changes +and favoring the least travel time for the connection **Saint Andrews** +to **Cologne**: + +```aql +--- +name: GRAPHKSP_05_StAndrews_to_Cologne +description: '' +dataset: kShortestPathsGraph +--- +FOR p IN OUTBOUND K_SHORTEST_PATHS 'places/StAndrews' TO 'places/Cologne' +GRAPH 'kShortestPathsGraph' +OPTIONS { + weightAttribute: 'travelTime', + defaultWeight: 15 +} + LIMIT 3 + RETURN { + places: p.vertices[*].label, + travelTimes: p.edges[*].travelTime, + travelTimeTotal: SUM(p.edges[*].travelTime) + } +``` + +And finally clean up by removing the named graph: + +```js +--- +name: GRAPHKSP_99_drop_graph +description: '' +--- +var examples = require("@arangodb/graph-examples/example-graph"); +examples.dropGraph("kShortestPathsGraph"); +~removeIgnoreCollection("places"); +~removeIgnoreCollection("connections"); +``` diff --git a/site/content/arangodb/oem/aql/graph-queries/shortest-path.md b/site/content/arangodb/oem/aql/graph-queries/shortest-path.md new file mode 100644 index 0000000000..ed8540e777 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/shortest-path.md @@ -0,0 +1,228 @@ +--- +title: Shortest Path in AQL +menuTitle: Shortest Path +weight: 15 +description: >- + Find one path of shortest length between two vertices +--- +## General query idea + +This type of query finds the shortest path between two given documents +(*startVertex* and *targetVertex*) in your graph. If there are multiple +shortest paths, the path with the lowest weight or a random one (in case +of a tie) is returned. + +The shortest path search emits the following two variables for every step of +the path: + +1. The vertex on this path. +2. The edge pointing to it. + +### Example execution + +Let's take a look at a simple example to explain how it works. +This is the graph that you are going to find a shortest path on: + +![traversal graph](../../../../images/traversal_graph.png) + +You can use the following parameters for the query: + +1. You start at the vertex **A**. +2. You finish with the vertex **D**. + +So, obviously, you have the vertices **A**, **B**, **C** and **D** on the +shortest path in exactly this order. Then, the shortest path statement +returns the following pairs: + +| Vertex | Edge | +|--------|-------| +| A | null | +| B | A → B | +| C | B → C | +| D | C → D | + +Note that the first edge is always `null` because there is no edge pointing +to the *startVertex*. + +## Syntax + +The next step is to see how you can write a shortest path query. +You have two options here, you can either use a named graph or a set of edge +collections (anonymous graph). + +### Working with named graphs + +```aql +FOR vertex[, edge] + IN OUTBOUND|INBOUND|ANY SHORTEST_PATH + startVertex TO targetVertex + GRAPH graphName + [OPTIONS options] +``` + +- `FOR`: Emits up to two variables: + - **vertex** (object): The current vertex on the shortest path + - **edge** (object, *optional*): The edge pointing to the vertex +- `IN` `OUTBOUND|INBOUND|ANY`: Defines in which direction edges are followed + (outgoing, incoming, or both) +- **startVertex** `TO` **targetVertex** (both string\|object): The two vertices between + which the shortest path is computed. This can be specified in the form of + an ID string or in the form of a document with the attribute `_id`. All other + values lead to a warning and an empty result. If one of the specified + documents does not exist, the result is empty as well and there is no warning. +- `GRAPH` **graphName** (string): The name identifying the named graph. Its vertex and + edge collections are looked up for the path search. +- `OPTIONS` **options** (object, *optional*): + See the [path search options](#path-search-options). + +{{< info >}} +Shortest Path traversals do not support negative weights. If a document +attribute (as specified by `weightAttribute`) with a negative value is +encountered during traversal, or if `defaultWeight` is set to a negative +number, then the query is aborted with an error. +{{< /info >}} + +### Working with collection sets + +```aql +FOR vertex[, edge] + IN OUTBOUND|INBOUND|ANY SHORTEST_PATH + startVertex TO targetVertex + edgeCollection1, ..., edgeCollectionN + [OPTIONS options] +``` + +Instead of `GRAPH graphName` you may specify a list of edge collections (anonymous +graph). The involved vertex collections are determined by the edges of the given +edge collections. The rest of the behavior is similar to the named version. + +### Path search options + +You can optionally specify the following options to modify the execution of a +graph path search. If you specify unknown options, query warnings are raised. + +#### `weightAttribute` + +A top-level edge attribute that should be used to read the edge weight (string). + +If the attribute does not exist or is not numeric, the `defaultWeight` is used +instead. + +The attribute value must not be negative. + +#### `defaultWeight` + +This value is used as fallback if there is no `weightAttribute` in the +edge document, or if it's not a number (number). + +The value must not be negative. The default is `1`. + +### Traversing in mixed directions + +For shortest path with a list of edge collections you can optionally specify the +direction for some of the edge collections. Say for example you have three edge +collections *edges1*, *edges2* and *edges3*, where in *edges2* the direction +has no relevance, but in *edges1* and *edges3* the direction should be taken into +account. In this case you can use `OUTBOUND` as general search direction and `ANY` +specifically for *edges2* as follows: + +```aql +FOR vertex IN OUTBOUND SHORTEST_PATH + startVertex TO targetVertex + edges1, ANY edges2, edges3 +``` + +All collections in the list that do not specify their own direction use the +direction defined after `IN` (here: `OUTBOUND`). This allows to use a different +direction for each collection in your path search. + +## Conditional shortest path + +The `SHORTEST_PATH` computation only finds an unconditioned shortest path. +With this construct it is not possible to define a condition like: "Find the +shortest path where all edges are of type *X*". If you want to do this, use a +normal [Traversal](traversals.md) instead with the option +`{order: "bfs"}` in combination with `LIMIT 1`. + +Please also consider using [`WITH`](../high-level-operations/with.md) to specify the +collections you expect to be involved. + +## Examples + +Creating a simple symmetric traversal demonstration graph: + +![traversal graph](../../../../images/traversal_graph.png) + +```js +--- +name: GRAPHSP_01_create_graph +description: '' +--- +~addIgnoreCollection("circles"); +~addIgnoreCollection("edges"); +var examples = require("@arangodb/graph-examples/example-graph"); +var graph = examples.loadGraph("traversalGraph"); +db.circles.toArray(); +db.edges.toArray(); +``` + +Start with the shortest path from **A** to **D** as above: + +```js +--- +name: GRAPHSP_02_A_to_D +description: '' +--- +db._query(` + FOR v, e IN OUTBOUND SHORTEST_PATH 'circles/A' TO 'circles/D' GRAPH 'traversalGraph' + RETURN [v._key, e._key] +`); + +db._query(` + FOR v, e IN OUTBOUND SHORTEST_PATH 'circles/A' TO 'circles/D' edges + RETURN [v._key, e._key] +`); +``` + +You can see that expectations are fulfilled. You find the vertices in the +correct ordering and the first edge is `null`, because no edge is pointing +to the start vertex on this path. + +You can also compute shortest paths based on documents found in collections: + +```js +--- +name: GRAPHSP_03_A_to_D +description: '' +--- +db._query(` + FOR a IN circles + FILTER a._key == 'A' + FOR d IN circles + FILTER d._key == 'D' + FOR v, e IN OUTBOUND SHORTEST_PATH a TO d GRAPH 'traversalGraph' + RETURN [v._key, e._key] +`); + +db._query(` + FOR a IN circles + FILTER a._key == 'A' + FOR d IN circles + FILTER d._key == 'D' + FOR v, e IN OUTBOUND SHORTEST_PATH a TO d edges + RETURN [v._key, e._key] +`); +``` + +And finally clean it up again: + +```js +--- +name: GRAPHSP_99_drop_graph +description: '' +--- +var examples = require("@arangodb/graph-examples/example-graph"); +examples.dropGraph("traversalGraph"); +~removeIgnoreCollection("circles"); +~removeIgnoreCollection("edges"); +``` diff --git a/site/content/arangodb/oem/aql/graph-queries/traversals-explained.md b/site/content/arangodb/oem/aql/graph-queries/traversals-explained.md new file mode 100644 index 0000000000..b4e9741151 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/traversals-explained.md @@ -0,0 +1,85 @@ +--- +title: AQL graph traversals explained +menuTitle: Traversals explained +weight: 5 +description: >- + Traversing a graph means to follow edges connected to a start vertex and + neighboring vertices until a specified depth +--- +## General query idea + +A traversal starts at one specific document (*startVertex*) and follows all +edges connected to this document. For all documents (*vertices*) that are +targeted by these edges it will again follow all edges connected to them and +so on. It is possible to define how many of these follow iterations should be +executed at least (*min* depth) and at most (*max* depth). + +For all vertices that were visited during this process in the range between +*min* depth and *max* depth iterations you will get a result in form of a +set with three items: + +1. The visited vertex. +2. The edge pointing to it. +3. The complete path from startVertex to the visited vertex as object with an + attribute *edges* and an attribute *vertices*, each a list of the corresponding + elements. These lists are sorted, which means the first element in *vertices* + is the *startVertex* and the last is the visited vertex, and the n-th element + in *edges* connects the n-th element with the (n+1)-th element in *vertices*. + +## Example execution + +Let's take a look at a simple example to explain how it works. +This is the graph that we are going to traverse: + +![traversal graph](../../../../images/traversal_graph.png) + +We use the following parameters for our query: + +1. We start at the vertex **A**. +2. We use a *min* depth of 1. +3. We use a *max* depth of 2. +4. We follow only in `OUTBOUND` direction of edges + +![traversal graph step 1](../../../../images/traversal_graph1.png) + +Now it walks to one of the direct neighbors of **A**, say **B** (note: ordering +is not guaranteed!): + +![traversal graph step 2](../../../../images/traversal_graph2.png) + +The query will remember the state (red circle) and will emit the first result +**A** → **B** (black box). This will also prevent the traverser to be trapped +in cycles. Now again it will visit one of the direct neighbors of **B**, say **E**: + +![traversal graph step 3](../../../../images/traversal_graph3.png) + +We have limited the query with a *max* depth of *2*, so it will not pick any +neighbor of **E**, as the path from **A** to **E** already requires *2* steps. +Instead, we will go back one level to **B** and continue with any other direct +neighbor there: + +![traversal graph step 4](../../../../images/traversal_graph4.png) + +Again after we produced this result we will step back to **B**. +But there is no neighbor of **B** left that we have not yet visited. +Hence we go another step back to **A** and continue with any other neighbor there. + +![traversal graph step 5](../../../../images/traversal_graph5.png) + +And identical to the iterations before we will visit **H**: + +![traversal graph step 6](../../../../images/traversal_graph6.png) + +And **J**: + +![traversal graph step 7](../../../../images/traversal_graph7.png) + +After these steps there is no further result left. So all together this query +has returned the following paths: + +1. **A** → **B** +2. **A** → **B** → **E** +3. **A** → **B** → **C** +4. **A** → **G** +5. **A** → **G** → **H** +6. **A** → **G** → **J** diff --git a/site/content/arangodb/oem/aql/graph-queries/traversals.md b/site/content/arangodb/oem/aql/graph-queries/traversals.md new file mode 100644 index 0000000000..657fbf0917 --- /dev/null +++ b/site/content/arangodb/oem/aql/graph-queries/traversals.md @@ -0,0 +1,890 @@ +--- +title: Graph traversals in AQL +menuTitle: Traversals +weight: 10 +description: >- + You can traverse named graphs and anonymous graphs with a native AQL + language construct +--- +## Syntax + +There are two slightly different syntaxes for traversals in AQL, one for +- [named graphs](../../graphs/_index.md#named-graphs) and another to +- specify a [set of edge collections](#working-with-collection-sets) + ([anonymous graph](../../graphs/_index.md#anonymous-graphs)). + +### Working with named graphs + +The syntax for AQL graph traversals using named graphs is as follows +(square brackets denote optional parts and `|` denotes alternatives): + +```aql +FOR vertex[, edge[, path]] + IN [min[..max]] + OUTBOUND|INBOUND|ANY startVertex + GRAPH graphName + [PRUNE [pruneVariable = ]pruneCondition] + [OPTIONS options] +``` + +- `FOR`: emits up to three variables: + - **vertex** (object): the current vertex in a traversal + - **edge** (object, *optional*): the current edge in a traversal + - **path** (object, *optional*): representation of the current path with + two members: + - `vertices`: an array of all vertices on this path + - `edges`: an array of all edges on this path +- `IN` `min..max`: the minimal and maximal depth for the traversal: + - **min** (number, *optional*): edges and vertices returned by this query + start at the traversal depth of *min* (thus edges and vertices below it are + not returned). If not specified, it defaults to 1. The minimal + possible value is 0. + - **max** (number, *optional*): up to *max* length paths are traversed. + If omitted, *max* defaults to *min*. Thus only the vertices and edges in + the range of *min* are returned. *max* cannot be specified without *min*. +- `OUTBOUND|INBOUND|ANY`: follow outgoing, incoming, or edges pointing in either + direction in the traversal. Note that this can't be replaced by a bind parameter. +- **startVertex** (string\|object): a vertex where the traversal originates from. + This can be specified in the form of an ID string or in the form of a document + with the `_id` attribute. All other values lead to a warning and an empty + result. If the specified document does not exist, the result is empty as well + and there is no warning. +- `GRAPH` **graphName** (string): the name identifying the named graph. + Its vertex and edge collections are looked up. Note that the graph name + is like a regular string, hence it must be enclosed by quote marks, like + `GRAPH "graphName"`. +- `PRUNE` **expression** (AQL expression, *optional*): + An expression, like in a `FILTER` statement, which is evaluated in every step of + the traversal, as early as possible. The semantics of this expression are as follows: + - If the expression evaluates to `false`, the traversal continues on the current path. + - If the expression evaluates to `true`, the traversal does not continue on the + current path. However, the paths up to this point are considered as a result + (they might still be post-filtered or ignored due to depth constraints). + For example, a traversal over the graph `(A) -> (B) -> (C)` starting at `A` + and pruning on `B` results in `(A)` and `(A) -> (B)` being valid paths, + whereas `(A) -> (B) -> (C)` is not returned because it gets pruned on `B`. + + You can only use a single `PRUNE` clause per `FOR` traversal operation, but + the prune expression can contain an arbitrary number of conditions using `AND` + and `OR` statements for complex expressions. You can use the variables emitted + by the `FOR` operation in the prune expression, as well as all variables + defined before the traversal. + + You can optionally assign the prune expression to a variable like + `PRUNE var = ` to use the evaluated result elsewhere in the query, + typically in a `FILTER` expression. + + See [Pruning](#pruning) for details. +- `OPTIONS` **options** (object, *optional*): See the [traversal options](#traversal-options). + +### Working with collection sets + +The syntax for AQL graph traversals using collection sets is as follows +(square brackets denote optional parts and `|` denotes alternatives): + +```aql +[WITH vertexCollection1[, vertexCollection2[, vertexCollectionN]]] +FOR vertex[, edge[, path]] + IN [min[..max]] + OUTBOUND|INBOUND|ANY startVertex + edgeCollection1[, edgeCollection2[, edgeCollectionN]] + [PRUNE [pruneVariable = ]pruneCondition] + [OPTIONS options] +``` + +- `WITH`: Declaration of collections. Optional for single server instances, but + required for [graph traversals in a cluster](#graph-traversals-in-a-cluster). + Needs to be placed at the very beginning of the query. + - **collections** (collection, *repeatable*): list of vertex collections that + are involved in the traversal +- **edgeCollections** (collection, *repeatable*): One or more edge collections + to use for the traversal (instead of using a named graph with `GRAPH graphName`). + Vertex collections are determined by the edges in the edge collections. + + You can override the default traversal direction by setting `OUTBOUND`, + `INBOUND`, or `ANY` before any of the edge collections. + + If the same edge collection is specified multiple times, it behaves as if it + were specified only once. Specifying the same edge collection is only allowed + when the collections do not have conflicting traversal directions. + + Views cannot be used as edge collections. +- See the [named graph variant](#working-with-named-graphs) for the remaining + traversal parameters as well as the [traversal options](#traversal-options). + The `edgeCollections` restriction option is redundant in this case. + +### Traversal options + +You can optionally specify the following options to modify the execution of a +graph traversal. If you specify unknown options, query warnings are raised. + +#### `order` + +Specify which traversal algorithm to use (string): +- `"bfs"` – the traversal is executed breadth-first. The results + first contain all vertices at depth 1, then all vertices at depth 2 and so on. +- `"dfs"` (default) – the traversal is executed depth-first. It + first returns all paths from *min* depth to *max* depth for one vertex at + depth 1, then for the next vertex at depth 1 and so on. +- `"weighted"` - the traversal is a weighted traversal + (introduced in v3.8.0). Paths are enumerated with increasing cost. + Also see `weightAttribute` and `defaultWeight`. A returned path has an + additional attribute `weight` containing the cost of the path after every + step. The order of paths having the same cost is non-deterministic. + Negative weights are not supported and abort the query with an error. + +#### `bfs` + +Deprecated, use `order: "bfs"` instead. + +#### `uniqueVertices` + +Ensure vertex uniqueness (string): + +- `"path"` – it is guaranteed that there is no path returned with a duplicate vertex +- `"global"` – it is guaranteed that each vertex is visited at most once during + the traversal, no matter how many paths lead from the start vertex to this one. + If you start with a `min depth > 1` a vertex that was found before *min* depth + might not be returned at all (it still might be part of a path). + It is required to set `order: "bfs"` or `order: "weighted"` because with + depth-first search the results would be unpredictable. **Note:** + Using this configuration the result is not deterministic any more. If there + are multiple paths from *startVertex* to *vertex*, one of those is picked. + In case of a `weighted` traversal, the path with the lowest weight is + picked, but in case of equal weights it is undefined which one is chosen. +- `"none"` (default) – no uniqueness check is applied on vertices + +#### `uniqueEdges` + +Ensure edge uniqueness (string): + +- `"path"` (default) – it is guaranteed that there is no path returned with a + duplicate edge +- `"none"` – no uniqueness check is applied on edges. **Note:** + Using this configuration, the traversal follows edges in cycles. + +#### `edgeCollections` + +Restrict edge collections the traversal may visit (string\|array). + +If omitted or an empty array is specified, then there are no restrictions. + +- A string parameter is treated as the equivalent of an array with a single + element. +- Each element of the array should be a string containing the name of an + edge collection. + +#### `vertexCollections` + +Restrict vertex collections the traversal may visit (string\|array). + +If omitted or an empty array is specified, then there are no restrictions. + +- A string parameter is treated as the equivalent of an array with a single + element. +- Each element of the array should be a string containing the name of a + vertex collection. +- The starting vertex is always allowed, even if it does not belong to one + of the collections specified by a restriction. + +#### `parallelism` + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +Parallelize traversal execution (number). + +If omitted or set to a value of `1`, the traversal execution is not parallelized. +If set to a value greater than `1`, then up to that many worker threads can be +used for concurrently executing the traversal. The value is capped by the number +of available cores on the target machine. + +Parallelizing a traversal is normally useful when there are many inputs (start +vertices) that the nested traversal can work on concurrently. This is often the +case when a nested traversal is fed with several tens of thousands of start +vertices, which can then be distributed randomly to worker threads for parallel +execution. + +#### `maxProjections` + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +Specifies the number of document attributes per `FOR` loop to be used as +projections (number). The default value is `5`. + +#### `weightAttribute` + +Specifies the name of an attribute that is used to look up the weight of an edge +(string). + +If no attribute is specified or if it is not present in the edge document then +the `defaultWeight` is used. + +The attribute value must not be negative. + +{{< info >}} +Weighted traversals do not support negative weights. If a document +attribute (as specified by `weightAttribute`) with a negative value is +encountered during traversal, the query is aborted with an error. +{{< /info >}} + +#### `defaultWeight` + +Specifies the default weight of an edge (number). The default value is `1`. + +The value must not be negative. + +{{< info >}} +Weighted traversals do not support negative weights. If `defaultWeight` is set +to a negative number, then the query is aborted with an error. +{{< /info >}} + +### Traversing in mixed directions + +For traversals with a list of edge collections you can optionally specify the +direction for some of the edge collections. Say for example you have three edge +collections *edges1*, *edges2* and *edges3*, where in *edges2* the direction has +no relevance but in *edges1* and *edges3* the direction should be taken into account. +In this case you can use `OUTBOUND` as general traversal direction and `ANY` +specifically for *edges2* as follows: + +```aql +FOR vertex IN OUTBOUND + startVertex + edges1, ANY edges2, edges3 +``` + +All collections in the list that do not specify their own direction use the +direction defined after `IN`. This allows to use a different direction for each +collection in your traversal. + +### Graph traversals in a cluster + +Due to the nature of graphs, edges may reference vertices from arbitrary +collections. Following the paths can thus involve documents from various +collections and it is not possible to predict which are visited in a +traversal. Which collections need to be loaded by the graph engine can only be +determined at run time. + +Use the [`WITH` statement](../high-level-operations/with.md) to specify the collections you +expect to be involved. This is required for traversals using collection sets +in cluster deployments. + +## Pruning + +You can define stop conditions for graph traversals to return specific data and +to improve the query performance. This is called _pruning_ and works by checking +conditions during the traversal as opposed to filtering the results afterwards +(post-filtering). This reduces the amount of data to be checked by stopping the +traversal down specific paths early. + +{{< youtube id="4LVeeC0ciCQ" >}} + +You can specify one `PRUNE` expression per graph traversal, but it can contain +an arbitrary number of conditions. You can use the vertex, edge, and path +variables emitted by the traversal in a prune expression, as well as all other +variables defined before the `FOR` operation. Note that `PRUNE` is an optional +clause of the `FOR` operation and that the `OPTIONS` clause needs to be placed +after `PRUNE`. + +```aql +--- +name: GRAPHTRAV_graphPruneExample1 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 0..10 OUTBOUND "places/Toronto" GRAPH "kShortestPathsGraph" + PRUNE v.label == "Edmonton" + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", p.vertices[*].label) +``` + +The above example shows a graph traversal using a +[train station and connections dataset](../../graphs/example-graphs.md#k-shortest-paths-graph): + +![Train Connection Map](../../../../images/train_map.png) + +The traversal starts at **Toronto** (bottom left), the traversal depth is +limited to 10, and every station is only visited once. The traversal could +continue up to **Vancouver** (bottom right) at depth 5, but it is stopped early +on this path (the only path in this example) at **Edmonton** because of the +prune expression. + +The traversal along paths is stopped as soon as the prune expression evaluates +to `true` for a given path. The current depth is still included in the result, +however. This can be seen in the query result of the example which includes the +Edmonton vertex at which it stopped. + +The following example starts a traversal at **London** (middle right), with a +depth between 2 and 3, and every station is only visited once. The station names +as well as the travel times are returned: + +```aql +--- +name: GRAPHTRAV_graphPruneExample2 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +The same example with an added prune expression, with vertex and edge conditions: + +```aql +--- +name: GRAPHTRAV_graphPruneExample3 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE v.label == "Carlisle" OR e.travelTime > 3 + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +If either the **Carlisle** vertex or an edge with a travel time of over three +hours is encountered, the subsequent paths are pruned. In the example, this +removes the train connections to **Birmingham**, **Glasgow**, and **York**, +which come after **Carlisle**, as well as the connections to and via +**Edinburgh** because of the four hour duration for the section from **York** +to **Edinburgh**. + +If your graph is comprised of multiple vertex or edge collections, you can +also prune as soon as you reach a certain collection, using a condition like +`PRUNE IS_SAME_COLLECTION("stopCollection", v)`. + +If you want to only return the results of the depth at which the traversal +stopped due to the prune expression, you can use a `FILTER` in addition. You can +assign the evaluated result of a prune expression to a variable +(`PRUNE var = `) and use it for filtering: + +```aql +--- +name: GRAPHTRAV_graphPruneExample4 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE cond = v.label == "Carlisle" OR e.travelTime > 3 + OPTIONS { uniqueVertices: "path" } + FILTER cond + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +Only paths that end at **Carlisle** or with the last edge having a travel time +of over three hours are returned. This excludes the connection to **Cologne** +from the results compared to the previous query. + +If you want to exclude the depth at which the prune expression stopped the +traversal, you can assign the expression to a variable and use its negated value +in a `FILTER`: + +```aql +--- +name: GRAPHTRAV_graphPruneExample5 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE cond = v.label == "Carlisle" OR e.travelTime > 3 + OPTIONS { uniqueVertices: "path" } + FILTER NOT cond + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +This only returns the connection to **Cologne**, which is the opposite of the +previous example. + +You may combine the prune variable with arbitrary other conditions in a `FILTER` +operation. For example, you can remove results where the last edge has as lower +travel time than the second to last edge of the path: + +```aql +--- +name: GRAPHTRAV_graphPruneExample6 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..5 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE cond = v.label == "Carlisle" OR e.travelTime > 3 + OPTIONS { uniqueVertices: "path" } + FILTER cond AND p.edges[-1].travelTime >= p.edges[-2].travelTime + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +{{< info >}} +The prune expression is **evaluated at every step of the traversal**. This +includes any traversal depths below the specified minimum depth, despite not +becoming part of the result. It also includes depth 0, which is the start vertex +and a `null` edge. + +If you add prune conditions using the edge variable, make sure to account for +the edge at depth 0 being `null`, as it may accidentally stop the traversal +immediately. This may not be apparent due to the depth constraints. +{{< /info >}} + +The following examples shows a graph traversal starting at **London**, with a +traversal depth between 2 and 3, and every station is only visited once: + +```aql +--- +name: GRAPHTRAV_graphPruneExample7 +description: '' +dataset: kShortestPathsGraph +--- +FOR v, e, p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +If you add prune conditions to stop the traversal if the station is **Glasgow** +or the travel time less than some number, no results are turned. This is even the +case for a value of `2.5`, for which two paths exist that fulfill the criterion +– to **Cologne** and **Carlisle**: + +```aql +--- +name: GRAPHTRAV_graphPruneExample8 +description: '' +dataset: kShortestPathsGraph +--- +FOR v,e,p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE v.label == "Glasgow" OR e.travelTime < 2.5 + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +The problem is that `null`, `false`, and `true` are all less than any number (`< 2.5`) +because of AQL's [Type and value order](../fundamentals/type-and-value-order.md), and +because the edge at depth 0 is always `null`. The prune condition is accidentally +fulfilled at the start vertex, stopping the traversal too early. This similarly +happens if you check an edge attribute for inequality (`!=`) and compare it to +string, for instance, which evaluates to `true` for the `null` value. + +The depth at which a traversal is stopped by pruning is considered as a result, +but in the above example, the minimum depth of `2` filters the start vertex out. +If you lower the minimum depth to `0`, you get **London** as the sole result. +This confirms that the traversal stopped at the start vertex. + +To avoid this problem, exclude the `null` value. For example, you can use +`e.travelTime > 0 AND e.travelTime < 2.5`, but more generic solutions are to +exclude depth 0 from the check (`LENGTH(p.edges) > 0`) or to simply ignore the +`null` edge (`e != null`): + +```aql +--- +name: GRAPHTRAV_graphPruneExample9 +description: '' +dataset: kShortestPathsGraph +--- +FOR v,e,p IN 2..3 OUTBOUND "places/London" GRAPH "kShortestPathsGraph" + PRUNE v.label == "Glasgow" OR (e != null AND e.travelTime < 2.5) + OPTIONS { uniqueVertices: "path" } + RETURN CONCAT_SEPARATOR(" -- ", INTERLEAVE(p.vertices[*].label, p.edges[*].travelTime)) +``` + +{{< warning >}} +You can use AQL functions in prune expressions but only those that can be +executed on DB-Servers, regardless of your deployment mode. The following +functions cannot be used in the expression: +- `CALL()` +- `APPLY()` +- `DOCUMENT()` +- `V8()` +- `SCHEMA_GET()` +- `SCHEMA_VALIDATE()` +- `VERSION()` +- `COLLECTIONS()` +- `CURRENT_USER()` +- `CURRENT_DATABASE()` +- `COLLECTION_COUNT()` +- `NEAR()` +- `WITHIN()` +- `WITHIN_RECTANGLE()` +- `FULLTEXT()` +- [User-defined functions (UDFs)](../user-defined-functions.md) +{{< /warning >}} + +## Using filters + +All three variables emitted by the traversals might as well be used in filter +statements. For some of these filter statements the optimizer can detect that it +is possible to prune paths of traversals earlier, hence filtered results are +not emitted to the variables in the first place. This may significantly +improve the performance of your query. Whenever a filter is not fulfilled, +the complete set of `vertex`, `edge` and `path` is skipped. All paths +with a length greater than the `max` depth are never computed. + +Filter conditions that are `AND`-combined can be optimized, but `OR`-combined +conditions cannot. + +### Filtering on paths + +Filtering on paths allows for the second most powerful filtering and may have the +second highest impact on performance. Using the path variable you can filter on +specific iteration depths. You can filter for absolute positions in the path +by specifying a positive number (which then qualifies for the optimizations), +or relative positions to the end of the path by specifying a negative number. + +#### Filtering edges on the path + +This example traversal filters all paths where the start edge (index 0) has the +attribute `theTruth` equal to `true`. The resulting paths are up to 5 items long: + +```aql +--- +name: GRAPHTRAV_graphFilterEdges +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[0].theTruth == true + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +#### Filtering vertices on the path + +Similar to filtering the edges on the path, you can also filter the vertices: + +```aql +--- +name: GRAPHTRAV_graphFilterVertices +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.vertices[1]._key == "G" + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +#### Combining several filters + +You can combine filters in any way you like: + +```aql +--- +name: GRAPHTRAV_graphFilterCombine +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[0].theTruth == true + AND p.edges[1].theFalse == false + FILTER p.vertices[1]._key == "G" + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +The query filters all paths where the first edge has the attribute +`theTruth` equal to `true`, the first vertex is `"G"` and the second edge has +the attribute `theFalse` equal to `false`. The resulting paths are up to +5 items long. + +**Note**: Despite the `min` depth of 1, this only returns results of +depth 2. This is because for all results in depth 1, the second edge does not +exist and hence cannot fulfill the condition here. + +#### Filter on the entire path + +With the help of array comparison operators filters can also be defined +on the entire path, like `ALL` edges should have `theTruth == true`: + +```aql +--- +name: GRAPHTRAV_graphFilterEntirePath +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[*].theTruth ALL == true + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +Or `NONE` of the edges should have `theTruth == true`: + +```aql +--- +name: GRAPHTRAV_graphFilterPathEdges +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[*].theTruth NONE == true + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +Both examples above are recognized by the optimizer and can potentially use other indexes +than the edge index. + +It is also possible to define that at least one edge on the path has to fulfill the condition: + +```aql +--- +name: GRAPHTRAV_graphFilterPathAnyEdge +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..5 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[*].theTruth ANY == true + RETURN { vertices: p.vertices[*]._key, edges: p.edges[*].label } +``` + +It is guaranteed that at least one, but potentially more edges fulfill the condition. +All of the above filters can be defined on vertices in the exact same way. + +### Filtering on the path vs. filtering on vertices or edges + +Filtering on the path influences the Iteration on your graph. If certain conditions +aren't met, the traversal may stop continuing along this path. + +In contrast filters on vertex or edge only express whether you're interested in the actual value of these +documents. Thus, it influences the list of returned documents (if you return v or e) similar +as specifying a non-null `min` value. If you specify a min value of 2, the traversal over the first +two nodes of these paths has to be executed - you just won't see them in your result array. + +Similar are filters on vertices or edges - the traverser has to walk along these nodes, since +you may be interested in documents further down the path. + +### Examples + +Create a simple symmetric traversal demonstration graph: + +![traversal graph](../../../../images/traversal_graph.png) + +```js +--- +name: GRAPHTRAV_01_create_graph +description: '' +--- +~addIgnoreCollection("circles"); +~addIgnoreCollection("edges"); +var examples = require("@arangodb/graph-examples/example-graph"); +var graph = examples.loadGraph("traversalGraph"); +db.circles.toArray(); +db.edges.toArray(); +print("once you don't need them anymore, clean them up:"); +examples.dropGraph("traversalGraph"); +``` + +To get started we select the full graph. For better overview we only return +the vertex IDs: + +```aql +--- +name: GRAPHTRAV_02_traverse_all_a +description: '' +dataset: traversalGraph +--- +FOR v IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_02_traverse_all_b +description: '' +dataset: traversalGraph +--- +FOR v IN 1..3 OUTBOUND 'circles/A' edges RETURN v._key +``` + +We can nicely see that it is heading for the first outer vertex, then goes back to +the branch to descend into the next tree. After that it returns to our start node, +to descend again. As we can see both queries return the same result, the first one +uses the named graph, the second uses the edge collections directly. + +Now we only want the elements of a specific depth (min = max = 2), the ones that +are right behind the fork: + +```aql +--- +name: GRAPHTRAV_03_traverse_3a +description: '' +dataset: traversalGraph +--- +FOR v IN 2..2 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_03_traverse_3b +description: '' +dataset: traversalGraph +--- +FOR v IN 2 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + RETURN v._key +``` + +As you can see, we can express this in two ways: with or without the `max` depth +parameter. + +### Filter examples + +Now let's start to add some filters. We want to cut of the branch on the right +side of the graph, we may filter in two ways: + +- we know the vertex at depth 1 has `_key` == `G` +- we know the `label` attribute of the edge connecting **A** to **G** is `right_foo` + +```aql +--- +name: GRAPHTRAV_04_traverse_4a +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.vertices[1]._key != 'G' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_04_traverse_4b +description: '' +dataset: traversalGraph +--- +FOR v, e, p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[0].label != 'right_foo' + RETURN v._key +``` + +As we can see, all vertices behind **G** are skipped in both queries. +The first filters on the vertex `_key`, the second on an edge label. +Note again, as soon as a filter is not fulfilled for any of the three elements +`v`, `e` or `p`, the complete set of these is excluded from the result. + +We also may combine several filters, for instance to filter out the right branch +(**G**), and the **E** branch: + +```aql +--- +name: GRAPHTRAV_05_traverse_5a +description: '' +dataset: traversalGraph +--- +FOR v,e,p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.vertices[1]._key != 'G' + FILTER p.edges[1].label != 'left_blub' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_05_traverse_5b +description: '' +dataset: traversalGraph +--- +FOR v,e,p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.vertices[1]._key != 'G' AND p.edges[1].label != 'left_blub' + RETURN v._key +``` + +As you can see, combining two `FILTER` statements with an `AND` has the same result. + +## Comparing OUTBOUND / INBOUND / ANY + +All our previous examples traversed the graph in `OUTBOUND` edge direction. +You may however want to also traverse in reverse direction (`INBOUND`) or +both (`ANY`). Since `circles/A` only has outbound edges, we start our queries +from `circles/E`: + +```aql +--- +name: GRAPHTRAV_06_traverse_6a +description: '' +dataset: traversalGraph +--- +FOR v IN 1..3 OUTBOUND 'circles/E' GRAPH 'traversalGraph' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_06_traverse_6b +description: '' +dataset: traversalGraph +--- +FOR v IN 1..3 INBOUND 'circles/E' GRAPH 'traversalGraph' + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_06_traverse_6c +description: '' +dataset: traversalGraph +--- +FOR v IN 1..3 ANY 'circles/E' GRAPH 'traversalGraph' + RETURN v._key +``` + +The first traversal only walks in the forward (`OUTBOUND`) direction. +Therefore from **E** we only can see **F**. Walking in reverse direction +(`INBOUND`), we see the path to **A**: **B** → **A**. + +Walking in forward and reverse direction (`ANY`) we can see a more diverse result. +First of all, we see the simple paths to **F** and **A**. However, these vertices +have edges in other directions and they are traversed. + +**Note**: The traverser may use identical edges multiple times. For instance, +if it walks from **E** to **F**, it continues to walk from **F** to **E** +using the same edge once again. Due to this, we see duplicate nodes in the result. + +Please note that the direction can't be passed in by a bind parameter. + +## Use the AQL explainer for optimizations + +Now let's have a look what the optimizer does behind the curtain and inspect +traversal queries using [the explainer](../execution-and-performance/query-optimization.md): + +```aql +--- +name: GRAPHTRAV_07_traverse_7 +description: '' +dataset: traversalGraph +explain: true +--- +FOR v,e,p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + LET localScopeVar = RAND() > 0.5 + FILTER p.edges[0].theTruth != localScopeVar + RETURN v._key +``` + +```aql +--- +name: GRAPHTRAV_07_traverse_8 +description: '' +dataset: traversalGraph +explain: true +--- +FOR v,e,p IN 1..3 OUTBOUND 'circles/A' GRAPH 'traversalGraph' + FILTER p.edges[0].label == 'right_foo' + RETURN v._key +``` + +We now see two queries: In one we add a `localScopeVar` variable, which is outside +the scope of the traversal itself - it is not known inside of the traverser. +Therefore, this filter can only be executed after the traversal, which may be +undesired in large graphs. The second query on the other hand only operates on the +path, and therefore this condition can be used during the execution of the traversal. +Paths that are filtered out by this condition won't be processed at all. + +And finally clean it up again: + +```js +--- +name: GRAPHTRAV_99_drop_graph +description: '' +--- +~examples.loadGraph("traversalGraph"); +var examples = require("@arangodb/graph-examples/example-graph"); +examples.dropGraph("traversalGraph"); +``` + +If this traversal is not powerful enough for your needs, like you cannot describe +your conditions as AQL filter statements, then you might want to have a look at +the [edge collection methods](../../develop/javascript-api/@arangodb/collection-object.md#edge-documents) +in the JavaScript API. + +Also see how to [combine graph traversals](../examples-and-query-patterns/traversals.md). diff --git a/site/content/arangodb/oem/aql/high-level-operations/_index.md b/site/content/arangodb/oem/aql/high-level-operations/_index.md new file mode 100644 index 0000000000..78432ed69f --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/_index.md @@ -0,0 +1,9 @@ +--- +title: High-level AQL operations +menuTitle: High-level Operations +weight: 25 +description: >- + High-level operations are the core language constructs of the query language + to perform actions like finding and returning data, as well as creating and + modifying documents +--- diff --git a/site/content/arangodb/oem/aql/high-level-operations/collect.md b/site/content/arangodb/oem/aql/high-level-operations/collect.md new file mode 100644 index 0000000000..cdcc8dcd5a --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/collect.md @@ -0,0 +1,375 @@ +--- +title: '`COLLECT` operation in AQL' +menuTitle: COLLECT +weight: 40 +description: >- + The `COLLECT` operation can group data by one or multiple grouping criteria, + retrieve all distinct values, count how often values occur, and calculate + statistical properties efficiently +--- +The different variants of `COLLECT` cover most needs for grouping and aggregating +data. For aggregation using a sliding window, see the [`WINDOW` operation](window.md). + +## Syntax + +There are several syntax variants for `COLLECT` operations: + +
COLLECT variableName = expression
+COLLECT variableName = expression INTO groupsVariable
+COLLECT variableName = expression INTO groupsVariable = projectionExpression
+COLLECT variableName = expression INTO groupsVariable KEEP keepVariable
+COLLECT variableName = expression WITH COUNT INTO countVariable
+COLLECT variableName = expression AGGREGATE variableName = aggregateExpression
+COLLECT variableName = expression AGGREGATE variableName = aggregateExpression INTO groupsVariable
+COLLECT AGGREGATE variableName = aggregateExpression
+COLLECT AGGREGATE variableName = aggregateExpression INTO groupsVariable
+COLLECT WITH COUNT INTO countVariable
+ +All variants can optionally end with an `OPTIONS { … }` clause. + +{{< info >}} +The `COLLECT` operation eliminates all local variables in the current scope. +After a `COLLECT`, only the variables introduced by `COLLECT` itself are available. +{{< /info >}} + +## Grouping syntaxes + +The first syntax form of `COLLECT` only groups the result by the defined group +criteria specified in *expression*. In order to further process the results +produced by `COLLECT`, a new variable (specified by *variableName*) is introduced. +This variable contains the group value. + +Here's an example query that find the distinct values in `u.city` and makes +them available in variable `city`: + +```aql +FOR u IN users + COLLECT city = u.city + RETURN { + "city" : city + } +``` + +The second form does the same as the first form, but additionally introduces a +variable (specified by *groupsVariable*) that contains all elements that fell into the +group. This works as follows: The *groupsVariable* variable is an array containing +as many elements as there are in the group. Each member of that array is +a JSON object in which the value of every variable that is defined in the +AQL query is bound to the corresponding attribute. Note that this considers +all variables that are defined before the `COLLECT` statement, but not those on +the top level (outside of any `FOR`), unless the `COLLECT` statement is itself +on the top level, in which case all variables are taken. Furthermore note +that it is possible that the optimizer moves `LET` statements out of `FOR` +statements to improve performance. + +```aql +FOR u IN users + COLLECT city = u.city INTO groups + RETURN { + "city" : city, + "usersInCity" : groups + } +``` + +In the above example, the array `users` will be grouped by the attribute +`city`. The result is a new array of documents, with one element per distinct +`u.city` value. The elements from the original array (here: `users`) per city are +made available in the variable `groups`. This is due to the `INTO` clause. + +`COLLECT` also allows specifying multiple group criteria. Individual group +criteria can be separated by commas: + +```aql +FOR u IN users + COLLECT country = u.country, city = u.city INTO groups + RETURN { + "country" : country, + "city" : city, + "usersInCity" : groups + } +``` + +In the above example, the array `users` is grouped by country first and then +by city, and for each distinct combination of country and city, the users +will be returned. + +## Discarding obsolete variables + +The third form of `COLLECT` allows rewriting the contents of the *groupsVariable* +using an arbitrary *projectionExpression*: + +```aql +FOR u IN users + COLLECT country = u.country, city = u.city INTO groups = u.name + RETURN { + "country" : country, + "city" : city, + "userNames" : groups + } +``` + +In the above example, only the *projectionExpression* is `u.name`. Therefore, +only this attribute is copied into the *groupsVariable* for each document. +This is probably much more efficient than copying all variables from the scope into +the *groupsVariable* as it would happen without a *projectionExpression*. + +The expression following `INTO` can also be used for arbitrary computations: + +```aql +FOR u IN users + COLLECT country = u.country, city = u.city INTO groups = { + "name" : u.name, + "isActive" : u.status == "active" + } + RETURN { + "country" : country, + "city" : city, + "usersInCity" : groups + } +``` + +`COLLECT` also provides an optional `KEEP` clause that can be used to control +which variables will be copied into the variable created by `INTO`. If no +`KEEP` clause is specified, all variables from the scope will be copied as +sub-attributes into the *groupsVariable*. +This is safe but can have a negative impact on performance if there +are many variables in scope or the variables contain massive amounts of data. + +The following example limits the variables that are copied into the *groupsVariable* +to just `name`. The variables `u` and `someCalculation` also present in the scope +will not be copied into *groupsVariable* because they are not listed in the `KEEP` clause: + +```aql +FOR u IN users + LET name = u.name + LET someCalculation = u.value1 + u.value2 + COLLECT city = u.city INTO groups KEEP name + RETURN { + "city" : city, + "userNames" : groups[*].name + } +``` + +`KEEP` is only valid in combination with `INTO`. Only valid variable names can +be used in the `KEEP` clause. `KEEP` supports the specification of multiple +variable names. + +## Group length calculation + +`COLLECT` also provides a special `WITH COUNT` clause that can be used to +determine the number of group members efficiently. + +The simplest form just returns the number of items that made it into the +`COLLECT`: + +```aql +FOR u IN users + COLLECT WITH COUNT INTO length + RETURN length +``` + +The above is equivalent to, but less efficient than: + +```aql +RETURN LENGTH(users) +``` + +The `WITH COUNT` clause can also be used to efficiently count the number +of items in each group: + +```aql +FOR u IN users + COLLECT age = u.age WITH COUNT INTO length + RETURN { + "age" : age, + "count" : length + } +``` + +{{< info >}} +The `WITH COUNT` clause can only be used together with an `INTO` clause. +{{< /info >}} + +## Aggregation + +A `COLLECT` statement can be used to perform aggregation of data per group. To +only determine group lengths, the `WITH COUNT INTO` variant of `COLLECT` can be +used as described before. + +For other aggregations, it is possible to run aggregate functions on the `COLLECT` +results: + +```aql +FOR u IN users + COLLECT ageGroup = FLOOR(u.age / 5) * 5 INTO g + RETURN { + "ageGroup" : ageGroup, + "minAge" : MIN(g[*].u.age), + "maxAge" : MAX(g[*].u.age) + } +``` + +The above however requires storing all group values during the collect operation for +all groups, which can be inefficient. + +The special `AGGREGATE` variant of `COLLECT` allows building the aggregate values +incrementally during the collect operation, and is therefore often more efficient. + +With the `AGGREGATE` variant the above query becomes: + +```aql +FOR u IN users + COLLECT ageGroup = FLOOR(u.age / 5) * 5 + AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age) + RETURN { + ageGroup, + minAge, + maxAge + } +``` + +The `AGGREGATE` keyword can only be used after the `COLLECT` keyword. If used, it +must directly follow the declaration of the grouping keys. If no grouping keys +are used, it must follow the `COLLECT` keyword directly: + +```aql +FOR u IN users + COLLECT AGGREGATE minAge = MIN(u.age), maxAge = MAX(u.age) + RETURN { + minAge, + maxAge + } +``` + +Only specific expressions are allowed on the right-hand side of each `AGGREGATE` +assignment: + +- on the top level, an aggregate expression must be a call to one of the + supported aggregation functions: + - `LENGTH()` / `COUNT()` + - `MIN()` + - `MAX()` + - `SUM()` + - `AVERAGE()` / `AVG()` + - `STDDEV_POPULATION()` / `STDDEV()` + - `STDDEV_SAMPLE()` + - `VARIANCE_POPULATION()` / `VARIANCE()` + - `VARIANCE_SAMPLE()` + - `UNIQUE()` + - `SORTED_UNIQUE()` + - `COUNT_DISTINCT()` / `COUNT_UNIQUE()` + - `BIT_AND()` + - `BIT_OR()` + - `BIT_XOR()` + +- an aggregate expression must not refer to variables introduced by the `COLLECT` itself + +## `COLLECT` vs. `RETURN DISTINCT` + +In order to make a result set unique, one can either use `COLLECT` or +`RETURN DISTINCT`. + +```aql +FOR u IN users + RETURN DISTINCT u.age +``` + +```aql +FOR u IN users + COLLECT age = u.age + RETURN age +``` + +Behind the scenes, both variants create a *CollectNode*. However, they use +different implementations of `COLLECT` that have different properties: + +- `RETURN DISTINCT` **maintains the order of results**, but it is limited to + a single value. + +- `COLLECT` **changes the order of results** (sorted or undefined), but it + supports multiple values and is more flexible than `RETURN DISTINCT`. + +Aside from `COLLECT`s sophisticated grouping and aggregation capabilities, it +allows you to place a `LIMIT` operation before `RETURN` to potentially stop the +`COLLECT` operation early. + +## `COLLECT` options + +### `method` + +There are two variants of `COLLECT` that the optimizer can choose from: +the *sorted* and the *hash* variant. The `method` option can be used in a +`COLLECT` statement to inform the optimizer about the preferred method, +`"sorted"` or `"hash"`. + +```aql +COLLECT ... OPTIONS { method: "sorted" } +``` + +If no method is specified by the user, then the optimizer will create a plan +that uses the *sorted* method, and an additional plan using the *hash* method +if the `COLLECT` statement qualifies for it. + +If the method is explicitly set to *sorted*, then the optimizer will always use +the *sorted* variant of `COLLECT` and not even create a plan using the *hash* +variant. If it is explicitly set to *hash*, then the optimizer will create a +plan using the *hash* method **only if the `COLLECT` statement qualifies**. +Not all `COLLECT` statements can use the *hash* method, in particular ones that +do not perform any grouping. In case the `COLLECT` statement qualifies, +there will only be one plan that uses the *hash* method. Otherwise, the +optimizer will default to the *sorted* method. + +The *sorted* method requires its input to be sorted by the group criteria +specified in the `COLLECT` clause. To ensure correctness of the result, the +optimizer will automatically insert a `SORT` operation into the query in front +of the `COLLECT` statement. The optimizer may be able to optimize away that +`SORT` operation later if a sorted index is present on the group criteria. + +In case a `COLLECT` statement qualifies for using the *hash* variant, the +optimizer will create an extra plan for it at the beginning of the planning +phase. In this plan, no extra `SORT` statement will be added in front of the +`COLLECT`. This is because the *hash* variant of `COLLECT` does not require +sorted input. Instead, a `SORT` statement will be added after the `COLLECT` to +sort its output. This `SORT` statement may be optimized away again in later +stages. + +If the sort order of the `COLLECT` is irrelevant to the user, adding the extra +instruction `SORT null` after the `COLLECT` will allow the optimizer to remove +the sorts altogether: + +```aql +FOR u IN users + COLLECT age = u.age + SORT null /* note: will be optimized away */ + RETURN age +``` + +Which `COLLECT` variant is used by the optimizer if no preferred method is set +explicitly depends on the optimizer's cost estimations. The created plans with +the different `COLLECT` variants will be shipped through the regular +optimization pipeline. In the end, the optimizer will pick the plan with the +lowest estimated total cost as usual. + +In general, the *sorted* variant of `COLLECT` should be preferred in cases when +there is a sorted index present on the group criteria. In this case the +optimizer can eliminate the `SORT` operation in front of the `COLLECT`, so that +no `SORT` will be left. + +If there is no sorted index available on the group criteria, the up-front sort +required by the *sorted* variant can be expensive. In this case it is likely +that the optimizer will prefer the *hash* variant of `COLLECT`, which does not +require its input to be sorted. + +Which variant of `COLLECT` will actually be used can be figured out by looking +at the execution plan of a query, specifically the comment of the *CollectNode*: + +```aql +Execution plan: + Id NodeType Est. Comment + 1 SingletonNode 1 * ROOT + 2 EnumerateCollectionNode 5 - FOR doc IN coll /* full collection scan, projections: `name` */ + 3 CalculationNode 5 - LET #2 = doc.`name` /* attribute expression */ /* collections used: doc : coll */ + 4 CollectNode 5 - COLLECT name = #2 /* hash */ + 6 SortNode 5 - SORT name ASC /* sorting strategy: standard */ + 5 ReturnNode 5 - RETURN name +``` diff --git a/site/content/arangodb/oem/aql/high-level-operations/filter.md b/site/content/arangodb/oem/aql/high-level-operations/filter.md new file mode 100644 index 0000000000..71fdd19cb2 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/filter.md @@ -0,0 +1,125 @@ +--- +title: '`FILTER` operation in AQL' +menuTitle: FILTER +weight: 15 +description: >- + The `FILTER` operation lets you restrict the results to elements that match + arbitrary logical conditions +--- +## Syntax + +
FILTER expression
+ +*expression* must be a condition that evaluates to either `false` or `true`. + +## Usage + +If the condition result is false, the current element is skipped, so it will +not be processed further and not be part of the result. If the condition is +true, the current element is not skipped and can be further processed. + +See [Operators](../operators.md) for a list of comparison operators, logical +operators etc. that you can use in conditions. + +```aql +FOR u IN users + FILTER u.active == true && u.age < 39 + RETURN u +``` + +It is allowed to specify multiple `FILTER` statements in a query, even in +the same block. If multiple `FILTER` statements are used, their results will be +combined with a logical `AND`, meaning all filter conditions must be true to +include an element. + +```aql +FOR u IN users + FILTER u.active == true + FILTER u.age < 39 + RETURN u +``` + +In the above example, all array elements of `users` that have an attribute +`active` with value `true` and that have an attribute `age` with a value less +than `39` (including `null` ones) will be included in the result. All other +elements of `users` will be skipped and not be included in the result produced +by `RETURN`. + +{{< info >}} +See [Accessing Data from Collections](../fundamentals/accessing-data-from-collections.md) +for a description of the impact of non-existent or null attributes. +{{< /info >}} + +While `FILTER` typically occurs in combination with `FOR`, it can also be used +at the top level or in subqueries without a surrounding `FOR` loop. + +```aql +FILTER false +RETURN ASSERT(false, "never reached") +``` + +## Order of operations + +Note that the positions of `FILTER` statements can influence the result of a query. +There are 16 active users in the [test data](../examples-and-query-patterns/_index.md#example-data) +for instance: + +```aql +FOR u IN users + FILTER u.active == true + RETURN u +``` + +We can limit the result set to 5 users at most: + +```aql +FOR u IN users + FILTER u.active == true + LIMIT 5 + RETURN u +``` + +This may return the user documents of Jim, Diego, Anthony, Michael and Chloe for +instance. Which ones are returned is undefined, since there is no `SORT` statement +to ensure a particular order. If we add a second `FILTER` statement to only return +women... + +```aql +FOR u IN users + FILTER u.active == true + LIMIT 5 + FILTER u.gender == "f" + RETURN u +``` + +... it might just return the Chloe document, because the `LIMIT` is applied before +the second `FILTER`. No more than 5 documents arrive at the second `FILTER` block, +and not all of them fulfill the gender criterion, even though there are more than +5 active female users in the collection. A more deterministic result can be achieved +by adding a `SORT` block: + +```aql +FOR u IN users + FILTER u.active == true + SORT u.age ASC + LIMIT 5 + FILTER u.gender == "f" + RETURN u +``` + +This will return the users *Mariah*, *Mary*, and *Isabella*. If sorted by age in +`DESC` order, then the *Sophia* and *Emma* documents are returned. A `FILTER` after a +`LIMIT` is not very common however, and you probably want such a query instead: + +```aql +FOR u IN users + FILTER u.active == true AND u.gender == "f" + SORT u.age ASC + LIMIT 5 + RETURN u +``` + +The significance of where `FILTER` blocks are placed allows that this single +keyword can assume the roles of two SQL keywords, `WHERE` as well as `HAVING`. +AQL's `FILTER` thus works with `COLLECT` aggregates the same as with any other +intermediate result, document attribute etc. diff --git a/site/content/arangodb/oem/aql/high-level-operations/for.md b/site/content/arangodb/oem/aql/high-level-operations/for.md new file mode 100644 index 0000000000..fd5fb14275 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/for.md @@ -0,0 +1,251 @@ +--- +title: '`FOR` operation in AQL' +menuTitle: FOR +weight: 5 +description: >- + The versatile `FOR` operation can iterate over a collection or View, the + elements of an array, or traverse a graph +--- +## Syntax + +The general syntax for iterating over collections and arrays is: + +
FOR variableName IN expression
+ +There is also a special variant for [graph traversals](../graph-queries/traversals.md): + +
FOR vertexVariableName [, edgeVariableName [, pathVariableName ] ] IN traversalExpression
+ +For Views, there is a special (optional) [`SEARCH` keyword](search.md): + +
FOR variableName IN viewName SEARCH searchExpression
+ +{{< info >}} +Views cannot be used as edge collections in traversals: + +```aql +FOR v IN 1..3 ANY startVertex viewName /* invalid! */ +``` +{{< /info >}} + +All variants can optionally end with an `OPTIONS { … }` clause. + +## Usage + +Each array element returned by *expression* is visited exactly once. It is +required that *expression* returns an array in all cases. The empty array is +allowed, too. The current array element is made available for further processing +in the variable specified by *variableName*. + +```aql +FOR u IN users + RETURN u +``` + +This iterates over all elements of the array referred to as `users`. This array +consists of all documents stored in the collection named `users` in this case. +The `FOR` operation makes the current array element available in a variable `u`, +which is not modified in this example but simply returned as a result using the +`RETURN` operation. + +{{< info >}} +When iterating over a collection, the order of documents is undefined unless you +define an explicit sort order with a [`SORT` operation](sort.md). +{{< /info >}} + +The variable introduced by `FOR` is available until the scope the `FOR` is +placed in is closed. + +Another example that uses a statically declared array of values to iterate over: + +```aql +FOR year IN [ 2011, 2012, 2013 ] + RETURN { "year" : year, "isLeapYear" : year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) } +``` + +Nesting of multiple `FOR` statements is allowed, too. When `FOR` statements are +nested, a cross product of the array elements returned by the individual `FOR` +statements will be created. + +```aql +FOR u IN users + FOR l IN locations + RETURN { "user" : u, "location" : l } +``` + +In this example, there are two array iterations: an outer iteration over the array +`users` plus an inner iteration over the array `locations`. The inner array is +traversed as many times as there are elements in the outer array. For each +iteration, the current values of `users` and `locations` are made available for +further processing in the variable `u` and `l`. + +You can also use subqueries, for example, to iterate over a collection +independently and get the results back as an array, that you can then access in +an outer `FOR` loop: + +```aql +FOR u IN users + LET subquery = (FOR l IN locations RETURN l.location) + RETURN { "user": u, "locations": subquery } +``` + +Also see [Combining queries with subqueries](../fundamentals/subqueries.md). + +## Options + +For collections and Views, the `FOR` construct supports an optional `OPTIONS` +clause to modify the behavior. The general syntax is as follows: + +
FOR variableName IN expression OPTIONS { option: value, ... }
+ +### `indexHint` + +For collections, index hints can be given to the optimizer with the `indexHint` +option. The value can be a single **index name** or a list of index names in +order of preference: + +```aql +FOR … IN … OPTIONS { indexHint: "byName" } +``` + +```aql +FOR … IN … OPTIONS { indexHint: ["byName", "byColor"] } +``` + +Whenever there is a chance to potentially use an index for this `FOR` loop, +the optimizer will first check if the specified index can be used. In case of +an array of indexes, the optimizer will check the feasibility of each index in +the specified order. It will use the first suitable index, regardless of +whether it would normally use a different index. + +If none of the specified indexes is suitable, then it falls back to its normal +logic to select another index or fails if `forceIndexHint` is enabled. + +### `forceIndexHint` + +Index hints are not enforced by default. If `forceIndexHint` is set to `true`, +then an error is generated if `indexHint` does not contain a usable index, +instead of using a fallback index or not using an index at all. + +```aql +FOR … IN … OPTIONS { indexHint: … , forceIndexHint: true } +``` + +### `disableIndex` + +Introduced in: v3.9.1 + +In some rare cases it can be beneficial to not do an index lookup or scan, +but to do a full collection scan. +An index lookup can be more expensive than a full collection scan if +the index lookup produces many (or even all documents) and the query cannot +be satisfied from the index data alone. + +Consider the following query and an index on the `value` attribute being +present: + +```aql +FOR doc IN collection + FILTER doc.value <= 99 + RETURN doc.other +``` + +In this case, the optimizer will likely pick the index on `value`, because +it will cover the query's `FILTER` condition. To return the value for the +`other` attribute, the query must additionally look up the documents for +each index value that passes the `FILTER` condition. If the number of +index entries is large (close or equal to the number of documents in the +collection), then using an index can cause more work than just scanning +over all documents in the collection. + +The optimizer will likely prefer index scans over full collection scans, +even if an index scan turns out to be slower in the end. +You can force the optimizer to not use an index for any given `FOR` +loop by using the `disableIndex` hint and setting it to `true`: + +```aql +FOR doc IN collection OPTIONS { disableIndex: true } + FILTER doc.value <= 99 + RETURN doc.other +``` + +Using `disableIndex: false` has no effect on geo indexes or fulltext indexes. + +Note that setting `disableIndex: true` plus `indexHint` is ambiguous. In +this case the optimizer will always prefer the `disableIndex` hint. + +### `maxProjections` + +Introduced in: v3.9.1 + +By default, the query optimizer will consider up to 5 document attributes +per FOR loop to be used as projections. If more than 5 attributes of a +collection are accessed in a `FOR` loop, the optimizer will prefer to +extract the full document and not use projections. + +The threshold value of 5 attributes is arbitrary and can be adjusted +by using the `maxProjections` hint. +The default value for `maxProjections` is `5`, which is compatible with the +previously hard-coded default value. + +For example, using a `maxProjections` hint of 7, the following query will +extract 7 attributes as projections from the original document: + +```aql +FOR doc IN collection OPTIONS { maxProjections: 7 } + RETURN [ doc.val1, doc.val2, doc.val3, doc.val4, doc.val5, doc.val6, doc.val7 ] +``` + +Normally it is not necessary to adjust the value of `maxProjections`, but +there are a few corner cases where it can make sense: + +- It can be beneficial to increase `maxProjections` when extracting many small + attributes from very large documents, and a full copy of the documents should + be avoided. +- It can be beneficial to decrease `maxProjections` to _avoid_ using + projections, if the cost of projections is higher than doing copies of the + full documents. This can be the case for very small documents. + +{{< info >}} +Starting with version 3.10, `maxProjections` can be used in +[Graph Traversals](../graph-queries/traversals.md#working-with-named-graphs) (Enterprise Edition only). +{{< /info >}} + +### `useCache` + +Introduced in: v3.10.0 + +You can disable in-memory caches that you may have enabled for persistent indexes +on a case-by-case basis. This is useful for queries that access indexes with +enabled in-memory caches, but for which it is known that using the cache will +have a negative performance impact. In this case, you can set the `useCache` +hint to `false`: + +```aql +FOR doc IN collection OPTIONS { useCache: false } + FILTER doc.value == @value + ... +``` + +You can set the hint individually per `FOR` loop. +If you do not set the `useCache` hint, it will implicitly default to `true`. + +The hint does not have any effect on `FOR` loops that do not use indexes, or +on `FOR` loops that access indexes that do not have in-memory caches enabled. +It also does not affect queries for which an existing in-memory +cache cannot be used (i.e. because the query's filter condition does not contain +equality lookups for all index attributes). It cannot be used for `FOR` +operations that iterate over Views or perform graph traversals. + +Also see [Caching of index values](../../indexes-and-search/indexing/working-with-indexes/persistent-indexes.md#caching-of-index-values). + +### `lookahead` + +The multi-dimensional index type `zkd` supports an optional index hint for +tweaking performance: + +```aql +FOR … IN … OPTIONS { lookahead: 32 } +``` + +See [Multi-dimensional indexes](../../indexes-and-search/indexing/working-with-indexes/multi-dimensional-indexes.md#lookahead-index-hint). diff --git a/site/content/arangodb/oem/aql/high-level-operations/insert.md b/site/content/arangodb/oem/aql/high-level-operations/insert.md new file mode 100644 index 0000000000..88acdfdf4f --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/insert.md @@ -0,0 +1,215 @@ +--- +title: '`INSERT` operation in AQL' +menuTitle: INSERT +weight: 65 +description: >- + You can use the `INSERT` operation to create new documents in a collection +--- +Each `INSERT` operation is restricted to a single collection, and the +[collection name](../../concepts/data-structure/collections.md#collection-names) must not be dynamic. +Only a single `INSERT` statement per collection is allowed per AQL query, and +it cannot be followed by read or write operations that access the same +collection, by traversal operations, or AQL functions that can read documents. + +## Syntax + +The syntax for an insert operation is: + +
INSERT document INTO collection
+ +It can optionally end with an `OPTIONS { … }` clause. + +{{< tip >}} +The `IN` keyword is allowed in place of `INTO` and has the same meaning. +{{< /tip >}} + +`collection` must contain the name of the collection into which the documents should +be inserted. `document` is the document to be inserted, and it may or may not contain +a `_key` attribute. If no `_key` attribute is provided, ArangoDB will auto-generate +a value for `_key` value. Inserting a document will also auto-generate a document +revision number for the document. + +```aql +FOR i IN 1..100 + INSERT { value: i } INTO numbers +``` + +An insert operation can also be performed without a `FOR` loop to insert a +single document: + +```aql +INSERT { value: 1 } INTO numbers +``` + +When inserting into an [edge collection](../../concepts/data-models.md#graph-model), +it is mandatory to specify the attributes `_from` and `_to` in document: + +```aql +FOR u IN users + FOR p IN products + FILTER u._key == p.recommendedBy + INSERT { _from: u._id, _to: p._id } INTO recommendations +``` + +## Query options + +The `OPTIONS` keyword followed by an object with query options can optionally +be provided in an `INSERT` operation. + +### `ignoreErrors` + +`ignoreErrors` can be used to suppress query errors that may occur when +violating unique key constraints: + +```aql +FOR i IN 1..1000 + INSERT { + _key: CONCAT('test', i), + name: "test", + foobar: true + } INTO users OPTIONS { ignoreErrors: true } +``` + +### `waitForSync` + +To make sure data are durable when an insert query returns, there is the +`waitForSync` query option: + +```aql +FOR i IN 1..1000 + INSERT { + _key: CONCAT('test', i), + name: "test", + foobar: true + } INTO users OPTIONS { waitForSync: true } +``` + +### `overwrite` + +{{< info >}} +The `overwrite` option is deprecated and superseded by +[overwriteMode](#overwritemode). +{{< /info >}} + +If you want to replace existing documents with documents having the same key +there is the `overwrite` query option. This will let you safely replace the +documents instead of raising a "unique constraint violated error": + +```aql +FOR i IN 1..1000 + INSERT { + _key: CONCAT('test', i), + name: "test", + foobar: true + } INTO users OPTIONS { overwrite: true } +``` + +### `overwriteMode` + +To further control the behavior of INSERT on primary index unique constraint +violations, there is the `overwriteMode` option. It offers the following +modes: + +- `"ignore"`: if a document with the specified `_key` value exists already, + nothing will be done and no write operation will be carried out. The + insert operation will return success in this case. This mode does not + support returning the old document version. Using `RETURN OLD` will trigger + a parse error, as there will be no old version to return. `RETURN NEW` + will only return the document in case it was inserted. In case the + document already existed, `RETURN NEW` will return `null`. +- `"replace"`: if a document with the specified `_key` value exists already, + it will be overwritten with the specified document value. This mode will + also be used when no overwrite mode is specified but the `overwrite` + flag is set to `true`. +- `"update"`: if a document with the specified `_key` value exists already, + it will be patched (partially updated) with the specified document value. +- `"conflict"`: if a document with the specified `_key` value exists already, + return a unique constraint violation error so that the insert operation + fails. This is also the default behavior in case the overwrite mode is + not set, and the `overwrite` flag is `false` or not set either. + +The main use case of inserting documents with overwrite mode `ignore` is +to make sure that certain documents exist in the cheapest possible way. +In case the target document already exists, the `ignore` mode is most +efficient, as it will not retrieve the existing document from storage and +not write any updates to it. + +When using the `update` overwrite mode, the `keepNull` and `mergeObjects` +options control how the update is done. +See [UPDATE operation](update.md#query-options). + +```aql +FOR i IN 1..1000 + INSERT { + _key: CONCAT('test', i), + name: "test", + foobar: true + } INTO users OPTIONS { overwriteMode: "update", keepNull: true, mergeObjects: false } +``` + +### `exclusive` + +The RocksDB engine does not require collection-level locks. +Different write operations on the same collection do not block each other, as +long as there are no _write-write conflicts_ on the same documents. From an application +development perspective it can be desired to have exclusive write access on collections, +to simplify the development. Note that writes do not block reads in RocksDB. +Exclusive access can also speed up modification queries, because we avoid conflict checks. + +Use the `exclusive` option to achieve this effect on a per query basis: + +```aql +FOR doc IN collection + INSERT { myval: doc.val + 1 } INTO users + OPTIONS { exclusive: true } +``` + +### `refillIndexCaches` + +Whether to add new entries to in-memory index caches if document insertions +affect the edge index or cache-enabled persistent indexes. + +```aql +INSERT { _from: "vert/A", _to: "vert/B" } INTO coll + OPTIONS { refillIndexCaches: true } +``` + +## Returning the inserted documents + +The inserted documents can also be returned by the query. In this case, the `INSERT` +statement can be a `RETURN` statement (intermediate `LET` statements are allowed, too). +To refer to the inserted documents, the `INSERT` statement introduces a pseudo-value +named `NEW`. + +The documents contained in `NEW` will contain all attributes, even those auto-generated by +the database (e.g. `_id`, `_key`, `_rev`). + +```aql +INSERT document INTO collection RETURN NEW +``` + +Following is an example using a variable named `inserted` to return the inserted +documents. For each inserted document, the document key is returned: + +```aql +FOR i IN 1..100 + INSERT { value: i } + INTO users + LET inserted = NEW + RETURN inserted._key +``` + +## Transactionality + +On a single server, an insert operation is executed transactionally in an +all-or-nothing fashion. + +A query may execute intermediate transaction commits in case the running +transaction (AQL query) hits the specified size thresholds. In this case, the +query's operations carried out so far are committed and not rolled back in case +of a later abort/rollback. This behavior can be controlled by adjusting the +intermediate commit settings for the RocksDB engine. See +[Known limitations for AQL queries](../fundamentals/limitations.md#storage-engine-properties). + +For sharded collections, the entire query and/or insert operation may not be +transactional, especially if it involves different shards and/or DB-Servers. diff --git a/site/content/arangodb/oem/aql/high-level-operations/let.md b/site/content/arangodb/oem/aql/high-level-operations/let.md new file mode 100644 index 0000000000..d8665ac121 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/let.md @@ -0,0 +1,69 @@ +--- +title: '`LET` operation in AQL' +menuTitle: LET +weight: 35 +description: >- + You can use the `LET` operation to assign an arbitrary value to a variable +--- +The variable is introduced in the scope the `LET` statement is placed in. +You cannot change the value once assigned. + +## Syntax + +
LET variableName = expression
+ +*expression* can be a simple expression or a subquery. + +For allowed variable names [AQL Syntax](../fundamentals/syntax.md#names). + +## Usage + +Variables are immutable in AQL, which means they cannot be re-assigned: + +```aql +LET a = [1, 2, 3] // initial assignment + +a = PUSH(a, 4) // syntax error, unexpected identifier +LET a = PUSH(a, 4) // parsing error, variable 'a' is assigned multiple times +LET b = PUSH(a, 4) // allowed, result: [1, 2, 3, 4] +``` + +`LET` statements are mostly used to declare complex computations and to avoid +repeated computations of the same value at multiple parts of a query. + +```aql +FOR u IN users + LET numRecommendations = LENGTH(u.recommendations) + RETURN { + "user" : u, + "numRecommendations" : numRecommendations, + "isPowerUser" : numRecommendations >= 10 + } +``` + +In the above example, the computation of the number of recommendations is +factored out using a `LET` statement, thus avoiding computing the value twice in +the `RETURN` statement. + +Another use case for `LET` is to declare a complex computation in a subquery, +making the whole query more readable. + +```aql +FOR u IN users + LET friends = ( + FOR f IN friends + FILTER u.id == f.userId + RETURN f + ) + LET memberships = ( + FOR m IN memberships + FILTER u.id == m.userId + RETURN m + ) + RETURN { + "user" : u, + "friends" : friends, + "numFriends" : LENGTH(friends), + "memberShips" : memberships + } +``` diff --git a/site/content/arangodb/oem/aql/high-level-operations/limit.md b/site/content/arangodb/oem/aql/high-level-operations/limit.md new file mode 100644 index 0000000000..c34ba21d02 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/limit.md @@ -0,0 +1,96 @@ +--- +title: '`LIMIT` operation in AQL' +menuTitle: LIMIT +weight: 30 +description: >- + The `LIMIT` operation allows you to reduce the number of results to at most + the specified number and optionally skip results using an offset for pagination +--- +## Syntax + +Two general forms of `LIMIT` are: + +
LIMIT count
+LIMIT offset, count
+ +The first form allows specifying only the `count` value whereas the second form +allows specifying both `offset` and `count`. The first form is identical using +the second form with an `offset` value of `0`. + +## Usage + +```aql +FOR u IN users + LIMIT 5 + RETURN u +``` + +Above query returns five documents of the `users` collection. +It could also be written as `LIMIT 0, 5` for the same result. +Which documents it returns is rather arbitrary because collections have no +defined order for the documents they contain. A `LIMIT` operation should usually +be accompanied with a `SORT` operation to explicitly specify a sorting order +unless any five documents are acceptable for you. However, also consider that if +you run a query multiple times with varying `LIMIT` offsets for pagination, +you can miss results or get duplicate results if the sort order is undefined. + +{{< info >}} +In case multiple documents contain the same `SORT` attribute value, the result +set does not contain the tied documents in a fixed order as the order between +them is undefined. Additionally, the `SORT` operation does not guarantee a stable +sort if there is no unique value to sort by. + +If a fixed total order is required, you can use a tiebreaker. Sort by an +additional attribute that can break the ties. If the application has a preferred +attribute that indicates the order of documents with the same value, then use +this attribute. If there is no such attribute, you can still achieve a stable +sort by using the `_id` system attribute as it is unique and present in every +document. + +```aql +FOR u IN users + SORT u.firstName, u._id // break name ties with the document ID + LIMIT 5 + RETURN u +``` +{{< /info >}} + +The `offset` value specifies how many elements from the result shall be +skipped. It must be 0 or greater. The `count` value specifies how many +elements should be at most included in the result. + +```aql +FOR u IN users + SORT u.firstName, u.lastName, u.id DESC + LIMIT 2, 5 + RETURN u +``` + +In above example, the documents of `users` are sorted, the first two results +get skipped, and the query returns the next five user documents. + +{{< info >}} +Variables, expressions, and subqueries cannot be used for `offset` and `count`. +The values for `offset` and `count` must be known at query compile time, +which means that you can only use number literals, bind parameters or +expressions that can be resolved at query compile time. +{{< /info >}} + +Where a `LIMIT` is used in relation to other operations in a query has meaning. +`LIMIT` operations before `FILTER`s in particular can change the result +significantly, because the operations are executed in the order in which they +are written in the query. See [FILTER](filter.md#order-of-operations) +for a detailed example. + +The `LIMIT` operation never applies to write operations (`INSERT`, `UPDATE`, +`REPLACE`, `REMOVE`, `UPSERT`) but only their returned results. In the following +example, five documents are created, regardless of the `LIMIT 2`. The `LIMIT` +operation only constrains the number of documents returned by the query (via +`RETURN`) to the first two: + +```aql +FOR i IN 1..5 + INSERT { value: i } INTO coll + LIMIT 2 + RETURN NEW +``` diff --git a/site/content/arangodb/oem/aql/high-level-operations/remove.md b/site/content/arangodb/oem/aql/high-level-operations/remove.md new file mode 100644 index 0000000000..7963c48b70 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/remove.md @@ -0,0 +1,185 @@ +--- +title: '`REMOVE` operation in AQL' +menuTitle: REMOVE +weight: 50 +description: >- + You can use the `REMOVE` operation to delete documents from a collection +--- +Each `REMOVE` operation is restricted to a single collection, and the +[collection name](../../concepts/data-structure/collections.md#collection-names) must not be dynamic. +Only a single `REMOVE` statement per collection is allowed per AQL query, and +it cannot be followed by read or write operations that access the same collection, by +traversal operations, or AQL functions that can read documents. + +## Syntax + +The syntax for a remove operation is: + +
REMOVE keyExpression IN collection
+ +It can optionally end with an `OPTIONS { … }` clause. + +`collection` must contain the name of the collection to remove the documents +from. `keyExpression` must be an expression that contains the document identification. +This can either be a string (which must then contain the +[document key](../../concepts/data-structure/documents/_index.md#document-keys)) or a +document, which must contain a `_key` attribute. + +The following queries are thus equivalent: + +```aql +FOR u IN users + REMOVE { _key: u._key } IN users +``` + +```aql +FOR u IN users + REMOVE u._key IN users +``` + +```aql +FOR u IN users + REMOVE u IN users +``` + +A remove operation can remove arbitrary documents, and the documents +do not need to be identical to the ones produced by a preceding `FOR` statement: + +```aql +FOR i IN 1..1000 + REMOVE { _key: CONCAT('test', i) } IN users +``` + +```aql +FOR u IN users + FILTER u.active == false + REMOVE { _key: u._key } IN backup +``` + +A single document can be removed as well, using a document key string or a +document with `_key` attribute: + +```aql +REMOVE 'john' IN users +``` + +```aql +LET doc = DOCUMENT('users/john') +REMOVE doc IN users +``` + +The restriction of a single remove operation per query and collection +applies. The following query causes an _access after data-modification_ +error because of the third remove operation: + +```aql +REMOVE 'john' IN users +REMOVE 'john' IN backups // OK, different collection +REMOVE 'mary' IN users // Error, users collection again +``` + +## Query options + +### `ignoreErrors` + +`ignoreErrors` can be used to suppress query errors that may occur when trying to +remove non-existing documents. For example, the following query will fail if one +of the to-be-deleted documents does not exist: + +```aql +FOR i IN 1..1000 + REMOVE { _key: CONCAT('test', i) } IN users +``` + +By specifying the `ignoreErrors` query option, these errors can be suppressed so +the query completes: + +```aql +FOR i IN 1..1000 + REMOVE { _key: CONCAT('test', i) } IN users OPTIONS { ignoreErrors: true } +``` + +### `waitForSync` + +To make sure data has been written to disk when a query returns, there is the `waitForSync` +query option: + +```aql +FOR i IN 1..1000 + REMOVE { _key: CONCAT('test', i) } IN users OPTIONS { waitForSync: true } +``` + +### `ignoreRevs` + +In order to not accidentally remove documents that have been updated since you last fetched +them, you can use the option `ignoreRevs` to either let ArangoDB compare the `_rev` values and +only succeed if they still match, or let ArangoDB ignore them (default): + +```aql +FOR i IN 1..1000 + REMOVE { _key: CONCAT('test', i), _rev: "1287623" } IN users OPTIONS { ignoreRevs: false } +``` + +### `exclusive` + +The RocksDB engine does not require collection-level locks. Different write +operations on the same collection do not block each other, as +long as there are no _write-write conflicts_ on the same documents. From an application +development perspective it can be desired to have exclusive write access on collections, +to simplify the development. Note that writes do not block reads in RocksDB. +Exclusive access can also speed up modification queries, because we avoid conflict checks. + +Use the `exclusive` option to achieve this effect on a per query basis: + +```aql +FOR doc IN collection + REPLACE doc._key + WITH { replaced: true } + OPTIONS { exclusive: true } +``` + +### `refillIndexCaches` + +Whether to delete existing entries from in-memory index caches and refill them +if document removals affect the edge index or cache-enabled persistent indexes. + +```aql +REMOVE { _key: "123" } IN edgeColl + OPTIONS { refillIndexCaches: true } +``` + +## Returning the removed documents + +The removed documents can also be returned by the query. In this case, the +`REMOVE` statement must be followed by a `RETURN` statement (intermediate `LET` +statements are allowed, too).`REMOVE` introduces the pseudo-value `OLD` to +refer to the removed documents: + +```aql +REMOVE keyExpression IN collection options RETURN OLD +``` + +Following is an example using a variable named `removed` for capturing the removed +documents. For each removed document, the document key will be returned. + +```aql +FOR u IN users + REMOVE u IN users + LET removed = OLD + RETURN removed._key +``` + +## Transactionality + +On a single server, the document removal is executed transactionally in an +all-or-nothing fashion. + +A query may execute intermediate transaction commits in case the running +transaction (AQL query) hits the specified size thresholds. In this case, the +query's operations carried out so far are committed and not rolled back in case +of a later abort/rollback. This behavior can be controlled by adjusting the +intermediate commit settings for the RocksDB engine. See +[Known limitations for AQL queries](../fundamentals/limitations.md#storage-engine-properties). + +For sharded collections, the entire query and/or remove operation may not be +transactional, especially if it involves different shards and/or DB-Servers. diff --git a/site/content/arangodb/oem/aql/high-level-operations/replace.md b/site/content/arangodb/oem/aql/high-level-operations/replace.md new file mode 100644 index 0000000000..b2aa732641 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/replace.md @@ -0,0 +1,306 @@ +--- +title: '`REPLACE` operation in AQL' +menuTitle: REPLACE +weight: 60 +description: >- + The `REPLACE` operation removes all attributes of a document and sets the + given attributes, excluding immutable system attributes +--- +Each `REPLACE` operation is restricted to a single collection, and the +[collection name](../../concepts/data-structure/collections.md#collection-names) must not be dynamic. +Only a single `REPLACE` statement per collection is allowed per AQL query, and +it cannot be followed by read or write operations that access the same collection, +by traversal operations, or AQL functions that can read documents. + +You cannot replace the `_id`, `_key`, and `_rev` system attributes, but you can +replace the `_from` and `_to` attributes. + +Replacing a document modifies the document's revision number (`_rev` attribute) +with a server-generated value. + +## Syntax + +The two syntaxes for a replace operation are: + +
REPLACE document IN collection
+REPLACE keyExpression WITH document IN collection
+ +Both variants can optionally end with an `OPTIONS { … }` clause. + +`collection` must contain the name of the collection in which the document +should be replaced. + +`document` must be an object and contain the attributes and values to set. +**All existing attributes** in the stored document **are removed** from it and +**only the provided attributes are set** (excluding the immutable `_id` and +`_key` attributes and the system-managed `_rev` attribute). This distinguishes +the `REPLACE` from the `UPDATE` operation, which only affects the attributes +you specify in the operation and doesn't change other attributes of the stored +document. + +### `REPLACE IN ` + +Using the first syntax, the `document` object must have a `_key` attribute with +the document key. The existing document with this key is replaced with the +attributes provided by the `document` object (except for the `_id`, `_key`, and +`_rev` system attributes). + +The following query replaces the document identified by the key `my_key` in the +`users` collection, only setting a `name` and a `status` attribute. The key is +passed via the `_key` attribute alongside other attributes: + +```aql +REPLACE { _key: "my_key", name: "Jon", status: "active" } IN users +``` + +The following query is invalid because the object does not contain a `_key` +attribute and thus it is not possible to determine the document to +be replaced: + +```aql +REPLACE { name: "Jon" } IN users +``` + +You can combine the `REPLACE` operation with a `FOR` loop to determine the +necessary key attributes, like shown below: + +```aql +FOR u IN users + REPLACE { _key: u._key, name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +Note that the `REPLACE` and `FOR` operations are independent of each other and +`u` does not automatically define a document for the `REPLACE` statement. +Thus, the following query is invalid: + +```aql +FOR u IN users + REPLACE { name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +### `REPLACE WITH IN ` + +Using the second syntax, the document to replace is defined by the +`keyExpression`. It can either be a string with the document key, an object +which contains a `_key` attribute with the document key, or an expression that +evaluates to either of these two. The existing document with this key is +replaced with the attributes provided by the `document` object (except for +the `_id`, `_key`, and `_rev` system attributes). + +The following query replaces the document identified by the key `my_key` in the +`users` collection, only setting a `name` and a `status` attribute. The key is +passed as a string in the `keyExpression`. The attributes to set are passed +separately as the `document` object: + +```aql +REPLACE "my_key" WITH { name: "Jon", status: "active" } IN users +``` + +The `document` object may contain a `_key` attribute, but it is ignored. + +You cannot define the document to replace using an `_id` attribute, nor pass a +document identifier as a string (like `"users/john"`). However, you can use +`PARSE_IDENTIFIER().key` as `keyExpression` to get the document key as a +string: + +```aql +LET key = PARSE_IDENTIFIER("users/john").key +REPLACE key WITH { ... } IN users +``` + +### Comparison of the syntaxes + +Both syntaxes of the `REPLACE` operation allow you to define the document to +modify and the attributes to set. The document to update is effectively +identified by a document key in combination with the specified collection. + +The `REPLACE` operation supports different ways of specifying the document key. +You can choose the syntax variant that is the most convenient for you. + +The following queries are equivalent: + +```aql +FOR u IN users + REPLACE u WITH { name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +```aql +FOR u IN users + REPLACE u._key WITH { name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +```aql +FOR u IN users + REPLACE { _key: u._key } WITH { name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +```aql +FOR u IN users + REPLACE { _key: u._key, name: CONCAT(u.firstName, " ", u.lastName), status: u.status } IN users +``` + +## Dynamic key expressions + +A `REPLACE` operation may replace arbitrary documents, using either of the two +syntaxes: + +```aql +FOR i IN 1..1000 + REPLACE { _key: CONCAT("test", i), name: "Paula", status: "active" } IN users +``` + +```aql +FOR i IN 1..1000 + REPLACE CONCAT("test", i) WITH { name: "Paula", status: "active" } IN users +``` + +## Target a different collection + +The documents a `REPLACE` operation modifies can be in a different collection +than the ones produced by a preceding `FOR` operation: + +```aql +FOR u IN users + FILTER u.active == false + REPLACE u WITH { status: "inactive", name: u.name } IN backup +``` + +Note how documents are read from the `users` collection but replaced in another +collection called `backup`. Both collections need to use matching document keys +for this to work. + +Although the `u` variable holds a whole document, it is only used to define the +target document. The `_key` attribute of the object is extracted and the target +document is solely defined by the document key string value and the specified +collection of the `REPLACE` operation (`backup`). There is no link to the +original collection (`users`). + +## Query options + +You can optionally set query options for the `REPLACE` operation: + +```aql +REPLACE ... IN users OPTIONS { ... } +``` + +### `ignoreErrors` + +You can use `ignoreErrors` to suppress query errors that may occur when trying to +replace non-existing documents or when violating unique key constraints: + +```aql +FOR i IN 1..1000 + REPLACE CONCAT("test", i) + WITH { foobar: true } IN users + OPTIONS { ignoreErrors: true } +``` + +You cannot modify the `_id`, `_key`, and `_rev` system attributes, but attempts +to change them are ignored and not considered errors. + +### `waitForSync` + +To make sure data are durable when a replace query returns, there is the `waitForSync` +query option: + +```aql +FOR i IN 1..1000 + REPLACE CONCAT("test", i) + WITH { foobar: true } IN users + OPTIONS { waitForSync: true } +``` + +### `ignoreRevs` + +In order to not accidentally overwrite documents that have been modified since you last fetched +them, you can use the option `ignoreRevs` to either let ArangoDB compare the `_rev` value and only +succeed if they still match, or let ArangoDB ignore them (default): + +```aql +FOR i IN 1..1000 + REPLACE { _key: CONCAT("test", i), _rev: "1287623" } + WITH { foobar: true } IN users + OPTIONS { ignoreRevs: false } +``` + +### `exclusive` + +The RocksDB engine does not require collection-level locks. Different write +operations on the same collection do not block each other, as +long as there are no _write-write conflicts_ on the same documents. From an application +development perspective it can be desired to have exclusive write access on collections, +to simplify the development. Note that writes do not block reads in RocksDB. +Exclusive access can also speed up modification queries, because we avoid conflict checks. + +Use the `exclusive` option to achieve this effect on a per query basis: + +```aql +FOR doc IN collection + REPLACE doc + WITH { replaced: true } IN collection + OPTIONS { exclusive: true } +``` + +### `refillIndexCaches` + +Whether to update existing entries in in-memory index caches if documents +replacements affect the edge index or cache-enabled persistent indexes. + +```aql +REPLACE { _key: "123", _from: "vert/C", _to: "vert/D" } IN edgeColl + OPTIONS { refillIndexCaches: true } +``` + +## Returning the modified documents + +You can optionally return the documents modified by the query. In this case, the `REPLACE` +operation needs to be followed by a `RETURN` operation. Intermediate `LET` operations are +allowed, too. These operations can refer to the pseudo-variables `OLD` and `NEW`. +The `OLD` pseudo-variable refers to the document revisions before the replace, and `NEW` +refers to the document revisions after the replace. + +Both `OLD` and `NEW` contain all document attributes, even those not specified +in the replace expression. + +```aql +REPLACE document IN collection options RETURN OLD +REPLACE document IN collection options RETURN NEW +REPLACE keyExpression WITH document IN collection options RETURN OLD +REPLACE keyExpression WITH document IN collection options RETURN NEW +``` + +Following is an example using a variable named `previous` to return the original +documents before modification. For each replaced document, the document key is +returned: + +```aql +FOR u IN users + REPLACE u WITH { value: "test" } IN users + LET previous = OLD + RETURN previous._key +``` + +The following query uses the `NEW` pseudo-value to return the replaced +documents, without some of their system attributes: + +```aql +FOR u IN users + REPLACE u WITH { value: "test" } IN users + LET replaced = NEW + RETURN UNSET(replaced, "_key", "_id", "_rev") +``` + +## Transactionality + +On a single server, replace operations are executed transactionally in an +all-or-nothing fashion. + +A query may execute intermediate transaction commits in case the running +transaction (AQL query) hits the specified size thresholds. In this case, the +query's operations carried out so far are committed and not rolled back in case +of a later abort/rollback. This behavior can be controlled by adjusting the +intermediate commit settings for the RocksDB engine. See +[Known limitations for AQL queries](../fundamentals/limitations.md#storage-engine-properties). + +For sharded collections, the entire query and/or replace operation may not be +transactional, especially if it involves different shards and/or DB-Servers. diff --git a/site/content/arangodb/oem/aql/high-level-operations/return.md b/site/content/arangodb/oem/aql/high-level-operations/return.md new file mode 100644 index 0000000000..c4344e0865 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/return.md @@ -0,0 +1,212 @@ +--- +title: '`RETURN` operation in AQL' +menuTitle: RETURN +weight: 10 +description: >- + You can use the `RETURN` operation to produce the result of a query +--- +A `RETURN` operation is mandatory at the end of each block in a data access query, +otherwise the query result would be undefined. Using `RETURN` at the top level +in data modification queries is optional. + +## Syntax + +The general syntax for `RETURN` is: + +
RETURN expression
+ +There is also a variant [`RETURN DISTINCT`](#return-distinct). + +The *expression* returned by `RETURN` is produced for each iteration in the block the +`RETURN` statement is placed in. That means the result of a `RETURN` statement +is **always an array**. This includes an empty array if no documents matched the +query and a single return value returned as array with one element. + +To return all elements from the currently iterated array without modification, +the following simple form can be used: + +
FOR variableName IN expression
+  RETURN variableName
+ +As `RETURN` allows specifying an expression, arbitrary computations can be +performed to calculate the result elements. Any of the variables valid in the +scope the `RETURN` is placed in can be used for the computations. + +## Usage + +To iterate over all documents of a collection called *users* and return the +full documents, you can write: + +```aql +FOR u IN users + RETURN u +``` + +In each iteration of the for-loop, a document of the *users* collection is +assigned to a variable *u* and returned unmodified in this example. To return +only one attribute of each document, you could use a different return expression: + +```aql +FOR u IN users + RETURN u.name +``` + +Or to return multiple attributes, an object can be constructed like this: + +```aql +FOR u IN users + RETURN { name: u.name, age: u.age } +``` + +Note: `RETURN` will close the current scope and eliminate all local variables in it. +This is important to remember when working with [subqueries](../fundamentals/subqueries.md). + +[Dynamic attribute names](../fundamentals/data-types.md#objects--documents) are +supported as well: + +```aql +FOR u IN users + RETURN { [ u._id ]: u.age } +``` + +The document *_id* of every user is used as expression to compute the +attribute key in this example: + +```json +[ + { + "users/9883": 32 + }, + { + "users/9915": 27 + }, + { + "users/10074": 69 + } +] +``` + +The result contains one object per user with a single key/value pair each. +This is usually not desired. For a single object, that maps user IDs to ages, +the individual results need to be merged and returned with another `RETURN`: + +```aql +RETURN MERGE( + FOR u IN users + RETURN { [ u._id ]: u.age } +) +``` + +```json +[ + { + "users/10074": 69, + "users/9883": 32, + "users/9915": 27 + } +] +``` + +Keep in mind that if the key expression evaluates to the same value multiple +times, only one of the key/value pairs with the duplicate name will survive +[`MERGE()`](../functions/document-object.md#merge). To avoid this, you can go without +dynamic attribute names, use static names instead and return all document +properties as attribute values: + +```aql +FOR u IN users + RETURN { name: u.name, age: u.age } +``` + +```json +[ + { + "name": "John Smith", + "age": 32 + }, + { + "name": "James Hendrix", + "age": 69 + }, + { + "name": "Katie Foster", + "age": 27 + } +] +``` + +## `RETURN DISTINCT` + +`RETURN` can optionally be followed by the `DISTINCT` keyword. +The `DISTINCT` keyword will ensure uniqueness of the values returned by the +`RETURN` statement: + +
FOR variableName IN expression
+  RETURN DISTINCT expression
+ +`RETURN DISTINCT` is not allowed on the top-level of a query if there is no `FOR` +loop preceding it. + +Below example returns `["foo", "bar", "baz"]`: + +```aql +FOR value IN ["foo", "bar", "bar", "baz", "foo"] + RETURN DISTINCT value +``` + +{{< tip >}} +`RETURN DISTINCT` will not change the order of the results it is applied on, +unlike [`COLLECT`](collect.md#collect-vs-return-distinct). +{{< /tip >}} + +If the `DISTINCT` is applied on an expression that itself is an array or a subquery, +the `DISTINCT` will not make the values in each array or subquery result unique, but instead +ensure that the result contains only distinct arrays or subquery results. To make +the result of an array or a subquery unique, simply apply the `DISTINCT` for the +array or the subquery. + +For example, the following query will apply `DISTINCT` on its subquery results, +but not inside the subquery: + +```aql +FOR what IN 1..2 + RETURN DISTINCT ( + FOR i IN [ 1, 2, 3, 4, 1, 3 ] + RETURN i + ) +``` + +Here we will have a `FOR` loop with two iterations that each execute a subquery. The +`DISTINCT` here is applied on the two subquery results. Both subqueries return the +same result value (that is `[ 1, 2, 3, 4, 1, 3 ]`), so after `DISTINCT` there will +only be one occurrence of the value `[ 1, 2, 3, 4, 1, 3 ]` left: + +```json +[ + [ 1, 2, 3, 4, 1, 3 ] +] +``` + +If the goal is to apply the `DISTINCT` inside the subquery, it needs to be moved +there: + +```aql +FOR what IN 1..2 + LET sub = ( + FOR i IN [ 1, 2, 3, 4, 1, 3 ] + RETURN DISTINCT i + ) + RETURN sub +``` + +In the above case, the `DISTINCT` will make the subquery results unique, so that +each subquery will return a unique array of values (`[ 1, 2, 3, 4 ]`). As the subquery +is executed twice and there is no `DISTINCT` on the top-level, that array will be +returned twice: + +```json +[ + [ 1, 2, 3, 4 ], + [ 1, 2, 3, 4 ] +] +``` diff --git a/site/content/arangodb/oem/aql/high-level-operations/search.md b/site/content/arangodb/oem/aql/high-level-operations/search.md new file mode 100644 index 0000000000..1f778cc1c0 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/search.md @@ -0,0 +1,337 @@ +--- +title: '`SEARCH` operation in AQL' +menuTitle: SEARCH +weight: 20 +description: >- + The `SEARCH` operation lets you filter Views, accelerated by the underlying + indexes +--- +The `SEARCH` operation guarantees to use View indexes for an efficient +execution plan. If you use the `FILTER` keyword for Views, no indexes are +utilized and the filtering is performed as a post-processing step. + +Conceptually, a View is just another document data source, +similar to an array or a document/edge collection, over which you can iterate +using a [`FOR` operation](for.md) in AQL: + +```aql +FOR doc IN viewName + RETURN doc +``` + +The optional `SEARCH` operation provides the capabilities to: + +- filter documents based on AQL Boolean expressions and functions +- match documents located in different collections backed by a fast index +- sort the result set based on how closely each document matched the + search conditions + +See [`arangosearch` Views](../../indexes-and-search/arangosearch/arangosearch-views-reference.md) and +[`search-alias` Views](../../indexes-and-search/arangosearch/search-alias-views-reference.md) on how to set up Views. + +## Syntax + +The `SEARCH` keyword is followed by an ArangoSearch filter expressions, which +is mostly comprised of calls to ArangoSearch AQL functions. + +
FOR doc IN viewName
+  SEARCH expression
+  OPTIONS { … }
+  ...
+ +## Usage + +The `SEARCH` statement, in contrast to `FILTER`, is treated as a part of the +`FOR` operation, not as an individual statement. It cannot be placed freely +in a query nor multiple times in the body of a `FOR` loop. `FOR ... IN` must be +followed by the name of a View, not a collection. The `SEARCH` operation has to +follow next, other operations before `SEARCH` such as `FILTER`, `COLLECT` etc. +are not allowed in this position. Subsequent operations are possible after +`SEARCH` and the expression however, including `SORT` to order the search +results based on a ranking value computed by the View. + +*expression* must be an ArangoSearch expression. The full power of ArangoSearch +is harnessed and exposed via special [ArangoSearch functions](../functions/arangosearch.md), +during both the search and sort stages. On top of that, common AQL operators +are supported. + +Note that inline expressions and a few other things are not supported by +`SEARCH`. The server will raise a query error in case of an invalid expression. + +The `OPTIONS` keyword and an object can optionally follow the search expression +to set [Search Options](#search-options). + +### Logical operators + +Logical or Boolean operators allow you to combine multiple search conditions. + +- `AND`, `&&` (conjunction) +- `OR`, `||` (disjunction) +- `NOT`, `!` (negation / inversion) + +[Operator precedence](../operators.md#operator-precedence) needs to be taken +into account and can be controlled with parentheses. + +Consider the following contrived expression: + +`doc.value < 0 OR doc.value > 5 AND doc.value IN [-10, 10]` + +`AND` has a higher precedence than `OR`. The expression is equivalent to: + +`doc.value < 0 OR (doc.value > 5 AND doc.value IN [-10, 10])` + +The conditions are thus: +- values less than 0 +- values greater than 5, but only if it is 10 + (or -10, but this can never be fulfilled) + +Parentheses can be used as follows to apply the `AND` condition to both of the +`OR` conditions: + +`(doc.value < 0 OR doc.value > 5) AND doc.value IN [-10, 10]` + +The conditions are now: +- values less than 0, but only if it is -10 +- values greater than 5, but only if it is 10 + +### Comparison operators + +- `==` (equal) +- `<=` (less than or equal) +- `>=` (greater than or equal) +- `<` (less than) +- `>` (greater than) +- `!=` (unequal) +- `IN` (contained in array or range), also `NOT IN` +- `LIKE` (equal with wildcards), also `NOT LIKE` + +Also see the [`IN_RANGE()` function](../functions/arangosearch.md#in_range) for +an alternative to a combination of `<`, `<=`, `>`, `>=` operators for range +searches. + +```aql +FOR doc IN viewName + SEARCH ANALYZER(doc.text == "quick" OR doc.text == "brown", "text_en") + // -- or -- + SEARCH ANALYZER(doc.text IN ["quick", "brown"], "text_en") + RETURN doc +``` + +{{< warning >}} +The alphabetical order of characters is not taken into account by ArangoSearch, +i.e. range queries in SEARCH operations against Views will not follow the +language rules as per the defined Analyzer locale (except for the +[`collation` Analyzer](../../indexes-and-search/analyzers.md#collation)) nor the server language +(startup option `--default-language`)! +Also see [Known Issues](../../release-notes/version-oem/known-issues-in-oem.md#arangosearch). +{{< /warning >}} + +### Array comparison operators + +[Array comparison operators](../operators.md#array-comparison-operators) are +supported: + +```aql +LET tokens = TOKENS("some input", "text_en") // ["some", "input"] +FOR doc IN myView SEARCH tokens ALL IN doc.text RETURN doc // dynamic conjunction +FOR doc IN myView SEARCH tokens ANY IN doc.text RETURN doc // dynamic disjunction +FOR doc IN myView SEARCH tokens NONE IN doc.text RETURN doc // dynamic negation +FOR doc IN myView SEARCH tokens ALL > doc.text RETURN doc // dynamic conjunction with comparison +FOR doc IN myView SEARCH tokens ANY <= doc.text RETURN doc // dynamic disjunction with comparison +FOR doc IN myView SEARCH tokens NONE < doc.text RETURN doc // dynamic negation with comparison +FOR doc IN myView SEARCH tokens AT LEAST (1+1) IN doc.text RETURN doc // dynamically test for a subset of elements +``` + +The following operators are equivalent in `SEARCH` expressions: +- `ALL IN`, `ALL ==`, `NONE !=`, `NONE NOT IN` +- `ANY IN`, `ANY ==` +- `NONE IN`, `NONE ==`, `ALL !=`, `ALL NOT IN` +- `ALL >`, `NONE <=` +- `ALL >=`, `NONE <` +- `ALL <`, `NONE >=` +- `ALL <=`, `NONE >` +- `AT LEAST (...) IN`, `AT LEAST (...) ==` +- `AT LEAST (1) IN`, `ANY IN` + +The stored attribute referenced on the right side of the operator is like a +single, primitive value. In case of multiple tokens, it is like having multiple +such values as opposed to an array of values, even if the actual document +attribute is an array. `IN` and `==` as part of array comparison operators are +treated the same in `SEARCH` expressions for ease of use. The behavior is +different outside of `SEARCH`, where `IN` needs to be followed by an array. + +### Question mark operator + +You can use the [Question mark operator](../operators.md#question-mark-operator) +to perform [Nested searches with ArangoSearch](../../indexes-and-search/arangosearch/nested-search.md) +(Enterprise Edition only): + +```aql +FOR doc IN myView + SEARCH doc.dimensions[? FILTER CURRENT.type == "height" AND CURRENT.value > 40] + RETURN doc +``` + +It allows you to match nested objects in arrays that satisfy multiple conditions +each, and optionally define how often these conditions should be fulfilled for +the entire array. You need to configure the View specifically for this type of +search using the `nested` property in [`arangosearch` Views](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#link-properties) +or in the definition of [Inverted Indexes](../../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md#nested-search-enterprise-edition) +that you can add to [`search-alias` Views](../../indexes-and-search/arangosearch/search-alias-views-reference.md). + +## Handling of non-indexed fields + +Document attributes which are not configured to be indexed by a View are +treated by `SEARCH` as non-existent. This affects tests against the documents +emitted from the View only. + +For example, given a collection `myCol` with the following documents: + +```js +{ "someAttr": "One", "anotherAttr": "One" } +{ "someAttr": "Two", "anotherAttr": "Two" } +``` + +… with an `arangosearch` View where `someAttr` is indexed by the following View `myView`: + +```js +{ + "type": "arangosearch", + "links": { + "myCol": { + "fields": { + "someAttr": {} + } + } + } +} +``` + +… a search on `someAttr` yields the following result: + +```aql +FOR doc IN myView + SEARCH doc.someAttr == "One" + RETURN doc +``` + +```json +[ { "someAttr": "One", "anotherAttr": "One" } ] +``` + +A search on `anotherAttr` yields an empty result because only `someAttr` +is indexed by the View: + +```aql +FOR doc IN myView + SEARCH doc.anotherAttr == "One" + RETURN doc +``` + +```json +[] +``` + +You can use the special `includeAllFields` +[`arangosearch` View property](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#link-properties) +to index all (sub-)attributes of the source documents if desired. + +## `SEARCH` with `SORT` + +The documents emitted from a View can be sorted by attribute values with the +standard [`SORT()` operation](sort.md), using one or multiple +attributes, in ascending or descending order (or a mix thereof). + +```aql +FOR doc IN viewName + SORT doc.text, doc.value DESC + RETURN doc +``` + +If the (left-most) fields and their sorting directions match up with the +[primary sort order](../../indexes-and-search/arangosearch/performance.md#primary-sort-order) definition +of the View then the `SORT` operation is optimized away. + +Apart from simple sorting, it is possible to sort the matched View documents by +relevance score (or a combination of score and attribute values if desired). +The document search via the `SEARCH` keyword and the sorting via the +[ArangoSearch Scoring Functions](../functions/arangosearch.md#scoring-functions), +namely `BM25()` and `TFIDF()`, are closely intertwined. +The query given in the `SEARCH` expression is not only used to filter documents, +but also is used with the scoring functions to decide which document matches +the query best. Other documents in the View also affect this decision. + +Therefore the ArangoSearch scoring functions can work _only_ on documents +emitted from a View, as both the corresponding `SEARCH` expression and the View +itself are consulted in order to sort the results. + +```aql +FOR doc IN viewName + SEARCH ... + SORT BM25(doc) DESC + RETURN doc +``` + +The [`BOOST()` function](../functions/arangosearch.md#boost) can be used to +fine-tune the resulting ranking by weighing sub-expressions in `SEARCH` +differently. + +If there is no `SEARCH` operation prior to calls to scoring functions or if +the search expression does not filter out documents (e.g. `SEARCH true`) then +a score of `0` will be returned for all documents. + +## Search Options + +The `SEARCH` operation supports an optional `OPTIONS` clause to modify the +behavior. The general syntax is as follows: + +
SEARCH expression OPTIONS { option: value, ... }
+ +### `collections` + +You can specify an array of strings with collection names to restrict the search +to certain source collections. + +Given a View with three linked collections `coll1`, `coll2`, and `coll3`, you +can return documents from the first two collections only and ignore the third +collection by setting the `collections` option to `["coll1", "coll2"]`: + +```aql +FOR doc IN viewName + SEARCH true OPTIONS { collections: ["coll1", "coll2"] } + RETURN doc +``` + +The search expression `true` in the above example matches all View documents. +You can use any valid expression here while limiting the scope to the chosen +source collections. + +### `conditionOptimization` + +You can specify one of the following values for this option to control how +search criteria get optimized: + +- `"auto"` (default): convert conditions to disjunctive normal form (DNF) and + apply optimizations. Removes redundant or overlapping conditions, but can + take quite some time even for a low number of nested conditions. +- `"none"`: search the index without optimizing the conditions. + + +See [Optimizing View and inverted index query performance](../../indexes-and-search/arangosearch/performance.md#condition-optimization-options) +for an example. + +### `countApproximate` + +This option controls how the total count of rows is calculated if the `fullCount` +option is enabled for a query or when a `COLLECT WITH COUNT` clause is executed. +You can set it to one of the following values: + +- `"exact"` (default): rows are actually enumerated for a precise count. +- `"cost"`: a cost-based approximation is used. Does not enumerate rows and + returns an approximate result with O(1) complexity. Gives a precise result + if the `SEARCH` condition is empty or if it contains a single term query + only (e.g. `SEARCH doc.field == "value"`), the usual eventual consistency + of Views aside. + +See [Optimizing View and inverted index query performance](../../indexes-and-search/arangosearch/performance.md#count-approximation) +for an example. diff --git a/site/content/arangodb/oem/aql/high-level-operations/sort.md b/site/content/arangodb/oem/aql/high-level-operations/sort.md new file mode 100644 index 0000000000..2c99b2e8ef --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/sort.md @@ -0,0 +1,109 @@ +--- +title: '`SORT` operation in AQL' +menuTitle: SORT +weight: 25 +description: >- + The `SORT` operation allows you to specify one or multiple sort criteria and + directions to control the order of query results or the elements of arrays +--- +## Syntax + +The general syntax is: + +
SORT expression direction
+ +## Usage + +The `SORT` operation sorts the already produced intermediate results of the +current block. For example, the following query sorts by `lastName` +(in ascending order), then `firstName` (in ascending order), then by `id` +(in descending order): + +```aql +FOR u IN users + SORT u.lastName, u.firstName, u.id DESC + RETURN u +``` + +Specifying the *direction* is optional. The default (implicit) direction for a +sort expression is the ascending order. To explicitly specify the sort direction, +the keywords `ASC` (ascending) and `DESC` (descending) can be used. Multiple sort +criteria can be separated using commas. In this case, the direction is specified +for each expression separately. + +The following example first sorts documents by `lastName` in ascending order and +then by `firstName` in ascending order. + +```aql +SORT doc.lastName, doc.firstName +``` + +The following example first sorts documents by `lastName` in descending order +and then by `firstName` in ascending order. + +```aql +SORT doc.lastName DESC, doc.firstName +``` + +The following example first sorts documents by `lastName` in ascending order +and then by `firstName` in descending order. + +```aql +SORT doc.lastName, doc.firstName DESC +``` + +{{< warning >}} +When iterating over a collection, the order of documents is always +**undefined unless an explicit sort order is defined** with a `SORT` operation. + +If the values you sort by are not unique, the order among tied documents is +undefined and you may want to sort by another attribute to break ties. +If the application has a preferred attribute that indicates the order of +documents with the same value, then use this attribute. If there is no such +attribute, you can still achieve a stable sort by using the `_id` system attribute +as it is unique and present in every document. + +```aql +FOR u IN users + SORT u.firstName, u._id // break name ties with the document ID + RETURN u +``` +{{< /warning >}} + +Constant `SORT` expressions can be used to indicate that no particular +sort order is desired. + +```aql +SORT null +``` + +Constant `SORT` expressions are optimized away by the AQL +optimizer during optimization, but specifying them explicitly may enable further +optimizations if the optimizer does not need to take into account any particular +sort order. This is especially the case after a `COLLECT` statement, which is +supposed to produce a sorted result. Specifying an extra `SORT null` after the +`COLLECT` statement allows to AQL optimizer to remove the post-sorting of the +collect results altogether. Also see [`COLLECT` option `method`](collect.md#method). + +In case of a sequence of `SORT` operations, the last one is always the one +that is performed unless a previous `SORT` expression is more accurate. +If the optimization rules `remove-redundant-sorts` and `remove-redundant-sorts-2` +are deactivated in the query's execution, then the last `SORT` is always the one +that wins, despite the accuracy. For example, consider the following query with +multiple consecutive `SORT` operations: + +```aql +FOR friend IN friends + SORT friend.friend.name, friend.id, friend.age + SORT friend.age, friend.id + SORT friend.age + RETURN friend +``` + +If the optimization rules mentioned above are deactivated, then the last `SORT` +becomes operative and the collection is sorted by `friend.age`. If the +optimization rules are active, then the second `SORT` becomes operative because +it covers the same `friend.age` attribute and additionally sorts by another +attribute in case of ties, making it more accurate. However, if the attributes +in the second `SORT` expression are in opposite order, as in +`SORT friend.id, friend.age`, then the last `SORT` is operative. diff --git a/site/content/arangodb/oem/aql/high-level-operations/update.md b/site/content/arangodb/oem/aql/high-level-operations/update.md new file mode 100644 index 0000000000..0a7ede0857 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/update.md @@ -0,0 +1,429 @@ +--- +title: '`UPDATE` operation in AQL' +menuTitle: UPDATE +weight: 55 +description: >- + The `UPDATE` operation partially modifies a document with the given attributes, + by adding new and updating existing attributes +--- +Each `UPDATE` operation is restricted to a single collection, and the +[collection name](../../concepts/data-structure/collections.md#collection-names) must not be dynamic. +Only a single `UPDATE` statement per collection is allowed per AQL query, and +it cannot be followed by read or write operations that access the same collection, +by traversal operations, or AQL functions that can read documents. + +You cannot update the `_id`, `_key`, and `_rev` system attributes, but you can +update the `_from` and `_to` attributes. + +Updating a document modifies the document's revision number (`_rev` attribute) +with a server-generated value. + +## Syntax + +The two syntaxes for an update operation are: + +
UPDATE document IN collection
+UPDATE keyExpression WITH document IN collection
+ +Both variants can optionally end with an `OPTIONS { … }` clause. + +`collection` must contain the name of the collection in which the document +should be updated. + +`document` must be an object and contain the attributes and values to update. +**Attributes that don't yet exist** in the stored document **are added** to it. +**Existing attributes are set to the provided attribute values** (excluding the +immutable `_id` and `_key` attributes and the system-managed `_rev` attribute). +The operation leaves other existing attributes not specified in `document` untouched. +This distinguishes the `UPDATE` from the `REPLACE` operation, which affects all +attributes of the stored document and not only the attributes you specify in the +operation. + +Sub-attributes are recursively merged by default, but you can let top-level +attributes replace existing ones by disabling the [`mergeObjects` option](#mergeobjects). + +### `UPDATE IN ` + +Using the first syntax, the `document` object must have a `_key` attribute with +the document key. The existing document with this key is updated with the +attributes provided by the `document` object (except for the `_id`, `_key`, and +`_rev` system attributes). + +The following query adds or updates the `name` attribute of the document +identified by the key `my_key` in the `users` collection. The key is passed via +the `_key` attribute alongside other attributes: + +```aql +UPDATE { _key: "my_key", name: "Jon" } IN users +``` + +The following query is invalid because the object does not contain a `_key` +attribute and thus it is not possible to determine the document to +be updated: + +```aql +UPDATE { name: "Jon" } IN users +``` + +You can combine the `UPDATE` operation with a `FOR` loop to determine the +necessary key attributes, like shown below: + +```aql +FOR u IN users + UPDATE { _key: u._key, name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +Note that the `UPDATE` and `FOR` operations are independent of each other and +`u` does not automatically define a document for the `UPDATE` statement. +Thus, the following query is invalid: + +```aql +FOR u IN users + UPDATE { name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +### `UPDATE WITH IN ` + +Using the second syntax, the document to update is defined by the +`keyExpression`. It can either be a string with the document key, an object +which contains a `_key` attribute with the document key, or an expression that +evaluates to either of these two. The existing document with this key is +updated with the attributes provided by the `document` object (except for +the `_id`, `_key`, and `_rev` system attributes). + +The following query adds or updates the `name` attribute of the document +identified by the key `my_key` in the `users` collection. The key is passed as +a string in the `keyExpression`. The attributes to add or update are passed +separately as the `document` object: + +```aql +UPDATE "my_key" WITH { name: "Jon" } IN users +``` + +The `document` object may contain a `_key` attribute, but it is ignored. + +You cannot define the document to update using an `_id` attribute, nor pass a +document identifier as a string (like `"users/john"`). However, you can use +`PARSE_IDENTIFIER().key` as `keyExpression` to get the document key as a +string: + +```aql +LET key = PARSE_IDENTIFIER("users/john").key +UPDATE key WITH { ... } IN users +``` + +### Comparison of the syntaxes + +Both syntaxes of the `UPDATE` operation allow you to define the document to +modify and the attributes to add or update. The document to update is effectively +identified by a document key in combination with the specified collection. + +The `UPDATE` operation supports different ways of specifying the document key. +You can choose the syntax variant that is the most convenient for you. + +The following queries are equivalent: + +```aql +FOR u IN users + UPDATE u WITH { name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +```aql +FOR u IN users + UPDATE u._key WITH { name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +```aql +FOR u IN users + UPDATE { _key: u._key } WITH { name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +```aql +FOR u IN users + UPDATE { _key: u._key, name: CONCAT(u.firstName, " ", u.lastName) } IN users +``` + +## Dynamic key expressions + +An `UPDATE` operation may update arbitrary documents, using either of the two +syntaxes: + +```aql +FOR i IN 1..1000 + UPDATE { _key: CONCAT("test", i), name: "Paula" } IN users +``` + +```aql +FOR i IN 1..1000 + UPDATE CONCAT("test", i) WITH { name: "Paula" } IN users +``` + +## Target a different collection + +The documents an `UPDATE` operation modifies can be in a different collection +than the ones produced by a preceding `FOR` operation: + +```aql +FOR u IN users + FILTER u.active == false + UPDATE u WITH { status: "inactive" } IN backup +``` + +Note how documents are read from the `users` collection but updated in another +collection called `backup`. Both collections need to use matching document keys +for this to work. + +Although the `u` variable holds a whole document, it is only used to define the +target document. The `_key` attribute of the object is extracted and the target +document is solely defined by the document key string value and the specified +collection of the `UPDATE` operation (`backup`). There is no link to the +original collection (`users`). + +## Using the current value of a document attribute + +The pseudo-variable `OLD` is not supported inside of `WITH` clauses (it is +available after `UPDATE`). To access the current attribute value, you can +usually refer to a document via the variable of the `FOR` loop, which is used +to iterate over a collection: + +```aql +FOR doc IN users + UPDATE doc WITH { + fullName: CONCAT(doc.firstName, " ", doc.lastName) + } IN users +``` + +If there is no loop, because a single document is updated only, then there +might not be a variable like above (`doc`), which would let you refer to the +document which is being updated: + +```aql +UPDATE "john" WITH { ... } IN users +``` + +To access the current value in this situation, you need to retrieve the document +first and store it in a variable: + +```aql +LET doc = FIRST(FOR u IN users FILTER u._key == "john" RETURN u) +UPDATE doc WITH { + fullName: CONCAT(doc.firstName, " ", doc.lastName) +} IN users +``` + +You can modify an existing attribute based on its current value this way, +to increment a counter for instance: + +```aql +UPDATE doc WITH { + karma: doc.karma + 1 +} IN users +``` + +If the attribute `karma` doesn't exist yet, `doc.karma` evaluates to `null`. +The expression `null + 1` results in the new attribute `karma` being set to `1`. +If the attribute does exist, then it is increased by `1`. + +Arrays can be mutated, too: + +```aql +UPDATE doc WITH { + hobbies: PUSH(doc.hobbies, "swimming") +} IN users +``` + +If the attribute `hobbies` doesn't exist yet, it is conveniently initialized +as `[ "swimming" ]` and otherwise extended. + +## Query options + +You can optionally set query options for the `UPDATE` operation: + +```aql +UPDATE ... IN users OPTIONS { ... } +``` + +### `ignoreErrors` + +You can use `ignoreErrors` to suppress query errors that may occur when trying to +update non-existing documents or when violating unique key constraints: + +```aql +FOR i IN 1..1000 + UPDATE CONCAT("test", i) + WITH { foobar: true } IN users + OPTIONS { ignoreErrors: true } +``` + +You cannot modify the `_id`, `_key`, and `_rev` system attributes, but attempts +to change them are ignored and not considered errors. + +### `keepNull` + +When updating an attribute to the `null` value, ArangoDB does not remove the +attribute from the document but stores this `null` value. To remove attributes +in an update operation, set them to `null` and set the `keepNull` option to +`false`. This removes the attributes you specify but not any previously stored +attributes with the `null` value: + +```aql +FOR u IN users + UPDATE u WITH { foobar: true, notNeeded: null } IN users + OPTIONS { keepNull: false } +``` + +The above query removes the `notNeeded` attribute from the documents and updates +the `foobar` attribute normally. + +Only top-level attributes and sub-attributes can be removed this way +(e.g. `{ attr: { sub: null } }`) but not attributes of objects that are nested +inside of arrays (e.g. `{ attr: [ { nested: null } ] }`). + +### `mergeObjects` + +The option `mergeObjects` controls whether object contents are +merged if an object attribute is present in both the `UPDATE` query and in the +to-be-updated document. + +The following query sets the updated document's `name` attribute to the exact +same value that is specified in the query. This is due to the `mergeObjects` option +being set to `false`: + +```aql +FOR u IN users + UPDATE u WITH { + name: { first: "foo", middle: "b.", last: "baz" } + } IN users + OPTIONS { mergeObjects: false } +``` + +Contrary, the following query merges the contents of the `name` attribute in the +original document with the value specified in the query: + +```aql +FOR u IN users + UPDATE u WITH { + name: { first: "foo", middle: "b.", last: "baz" } + } IN users + OPTIONS { mergeObjects: true } +``` + +Attributes in `name` that are present in the to-be-updated document but not in the +query are preserved. Attributes that are present in both are overwritten +with the values specified in the query. + +Note: the default value for `mergeObjects` is `true`, so there is no need to specify it +explicitly. + +### `waitForSync` + +To make sure data are durable when an update query returns, there is the `waitForSync` +query option: + +```aql +FOR u IN users + UPDATE u WITH { foobar: true } IN users + OPTIONS { waitForSync: true } +``` + +### `ignoreRevs` + +In order to not accidentally overwrite documents that have been modified since you last fetched +them, you can use the option `ignoreRevs` to either let ArangoDB compare the `_rev` value and +only succeed if they still match, or let ArangoDB ignore them (default): + +```aql +FOR i IN 1..1000 + UPDATE { _key: CONCAT("test", i), _rev: "1287623" } + WITH { foobar: true } IN users + OPTIONS { ignoreRevs: false } +``` + +### `exclusive` + +The RocksDB engine does not require collection-level locks. Different write +operations on the same collection do not block each other, as +long as there are no _write-write conflicts_ on the same documents. From an application +development perspective it can be desired to have exclusive write access on collections, +to simplify the development. Note that writes do not block reads in RocksDB. +Exclusive access can also speed up modification queries, because we avoid conflict checks. + +Use the `exclusive` option to achieve this effect on a per query basis: + +```aql +FOR doc IN collection + UPDATE doc + WITH { updated: true } IN collection + OPTIONS { exclusive: true } +``` + +### `refillIndexCaches` + +Whether to update existing entries in in-memory index caches if document updates +affect the edge index or cache-enabled persistent indexes. + +```aql +UPDATE { _key: "123", _from: "vert/C", _to: "vert/D" } IN edgeColl + OPTIONS { refillIndexCaches: true } +``` + +## Returning the modified documents + +You can optionally return the documents modified by the query. In this case, the `UPDATE` +operation needs to be followed by a `RETURN` operation. Intermediate `LET` operations are +allowed, too. These operations can refer to the pseudo-variables `OLD` and `NEW`. +The `OLD` pseudo-variable refers to the document revisions before the update, and `NEW` +refers to the document revisions after the update. + +Both `OLD` and `NEW` contain all document attributes, even those not specified +in the update expression. + +```aql +UPDATE document IN collection options RETURN OLD +UPDATE document IN collection options RETURN NEW +UPDATE keyExpression WITH document IN collection options RETURN OLD +UPDATE keyExpression WITH document IN collection options RETURN NEW +``` + +Following is an example using a variable named `previous` to capture the original +documents before modification. For each modified document, the document key is returned. + +```aql +FOR u IN users + UPDATE u WITH { value: "test" } IN users + LET previous = OLD + RETURN previous._key +``` + +The following query uses the `NEW` pseudo-value to return the updated documents, +without some of the system attributes: + +```aql +FOR u IN users + UPDATE u WITH { value: "test" } IN users + LET updated = NEW + RETURN UNSET(updated, "_key", "_id", "_rev") +``` + +It is also possible to return both `OLD` and `NEW`: + +```aql +FOR u IN users + UPDATE u WITH { value: "test" } IN users + RETURN { before: OLD, after: NEW } +``` + +## Transactionality + +On a single server, updates are executed transactionally in an all-or-nothing +fashion. + +A query may execute intermediate transaction commits in case the running +transaction (AQL query) hits the specified size thresholds. In this case, the +query's operations carried out so far are committed and not rolled back in case +of a later abort/rollback. This behavior can be controlled by adjusting the +intermediate commit settings for the RocksDB engine. See +[Known limitations for AQL queries](../fundamentals/limitations.md#storage-engine-properties). + +For sharded collections, the entire query and/or update operation may not be +transactional, especially if it involves different shards and/or DB-Servers. diff --git a/site/content/arangodb/oem/aql/high-level-operations/upsert.md b/site/content/arangodb/oem/aql/high-level-operations/upsert.md new file mode 100644 index 0000000000..a4c705a249 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/upsert.md @@ -0,0 +1,276 @@ +--- +title: '`UPSERT` operation in AQL' +menuTitle: UPSERT +weight: 70 +description: >- + An `UPSERT` operation either modifies an existing document, or creates a new + document if it does not exist +--- +`UPSERT` looks up a single document that matches the provided example. +If there is no match, an insert operation is executed to create a +document. If a document is found, you can either update or replace the document. +These subtypes are called **upsert** (update or insert) and **repsert** +(replace or insert). + +Each `UPSERT` operation is restricted to a single collection, and the +[collection name](../../concepts/data-structure/collections.md#collection-names) must not be dynamic. +Only a single `UPSERT` statement per collection is allowed per AQL query, and +it cannot be followed by read or write operations that access the same collection, by +traversal operations, or AQL functions that can read documents. + +## Syntax + +The syntax for an upsert operation: + +
UPSERT searchExpression
+INSERT insertExpression
+UPDATE updateExpression
+IN collection
+ +The syntax for a repsert operation: + +
UPSERT searchExpression
+INSERT insertExpression
+REPLACE updateExpression
+IN collection
+ +Both variants can optionally end with an `OPTIONS { … }` clause. + +When using the `UPDATE` variant of the `UPSERT` operation, the found document +is partially updated, meaning only the attributes specified in +*updateExpression* are updated or added. When using the `REPLACE` variant +of `UPSERT` (repsert), the found document is replaced with the content of +*updateExpression*. + +Updating a document modifies the document's revision number with a server-generated value. +The system attributes `_id`, `_key`, and `_rev` cannot be updated, but `_from` and `_to` +can be modified. + +The *searchExpression* contains the document to be looked for. It must be an +**object literal** (`UPSERT { : , ... } ...`) without dynamic +attribute names. In case no such document can be found in *collection*, a new +document is inserted into the collection as specified in the *insertExpression*. + +In case at least one document in *collection* matches the *searchExpression*, it is +updated using the *updateExpression*. When more than one document in the collection +matches the *searchExpression*, it is undefined which of the matching documents is +updated. It is therefore often sensible to make sure by other means (such as unique +indexes, application logic etc.) that at most one document matches *searchExpression*. + +The following query looks for a document in the `users` collection with a specific +`name` attribute value. If the document exists, its *logins* attribute is increased +by one. If it does not exist, a new document is inserted, consisting of the +attributes `name`, `logins`, and `dateCreated`: + +```aql +UPSERT { name: 'superuser' } +INSERT { name: 'superuser', logins: 1, dateCreated: DATE_NOW() } +UPDATE { logins: OLD.logins + 1 } IN users +``` + +Note that in the `UPDATE` case it is possible to refer to the previous version of the +document using the `OLD` pseudo-value. + +## Query options + +### `ignoreErrors` + +The `ignoreErrors` option can be used to suppress query errors that may occur +when trying to violate unique key constraints. + +### `keepNull` + +When updating an attribute to the `null` value, ArangoDB does not remove the +attribute from the document but stores this `null` value. To remove attributes +in an update operation, set them to `null` and set the `keepNull` option to +`false`. This removes the attributes you specify but not any previously stored +attributes with the `null` value: + +```aql +UPSERT { _key: "mary" } +INSERT { _key: "mary", name: "Mary", notNeeded: 123 } +UPDATE { foobar: true, notNeeded: null } +IN users OPTIONS { keepNull: false } +``` + +If no document with the key `mary` exists, the above query creates such a user +document with a `notNeeded` attribute. If it exists already, it removes the +`notNeeded` attribute from the document and updates the `foobar` attribute +normally. + +Only top-level attributes and sub-attributes can be removed this way +(e.g. `{ attr: { sub: null } }`) but not attributes of objects that are nested +inside of arrays (e.g. `{ attr: [ { nested: null } ] }`). + +### `mergeObjects` + +The option `mergeObjects` controls whether object contents are +merged if an object attribute is present in both the `UPDATE` query and in the +to-be-updated document. + +{{< tip >}} +The default value for `mergeObjects` is `true`, so there is no need to specify it +explicitly. +{{< /tip >}} + +### `waitForSync` + +To make sure data are durable when an update query returns, there is the `waitForSync` +query option. + +### `ignoreRevs` + +In order to not accidentally update documents that have been written and updated since +you last fetched them you can use the option `ignoreRevs` to either let ArangoDB compare +the `_rev` value and only succeed if they still match, or let ArangoDB ignore them (default): + +```aql +FOR i IN 1..1000 + UPSERT { _key: CONCAT('test', i)} + INSERT {foobar: false} + UPDATE {_rev: "1287623", foobar: true } + IN users OPTIONS { ignoreRevs: false } +``` + +{{< info >}} +You need to add the `_rev` value in the *updateExpression*. It is not used +within the *searchExpression*. Even worse, if you use an outdated `_rev` in the +*searchExpression*, `UPSERT` triggers the `INSERT` path instead of the +`UPDATE` path, because it has not found a document exactly matching the +*searchExpression*. +{{< /info >}} + +### `exclusive` + +The RocksDB engine does not require collection-level locks. Different write +operations on the same collection do not block each other, as +long as there are no _write-write conflicts_ on the same documents. From an application +development perspective it can be desired to have exclusive write access on collections, +to simplify the development. Note that writes do not block reads in RocksDB. +Exclusive access can also speed up modification queries, because we avoid conflict checks. + +Use the `exclusive` option to achieve this effect on a per query basis: + +```aql +FOR i IN 1..1000 + UPSERT { _key: CONCAT('test', i) } + INSERT { foobar: false } + UPDATE { foobar: true } + IN users OPTIONS { exclusive: true } +``` + +### `indexHint` + +The `indexHint` option is used as a hint for the document lookup +performed as part of the `UPSERT` operation, and can help in cases such as +`UPSERT` not picking the best index automatically. + +```aql +UPSERT { a: 1234 } + INSERT { a: 1234, name: "AB" } + UPDATE { name: "ABC" } IN myCollection + OPTIONS { indexHint: "index_name" } +``` + +The index hint is passed through to an internal `FOR` loop that is used for the +lookup. Also see [`indexHint` Option of the `FOR` Operation](for.md#indexhint). + +Inverted indexes cannot be used for `UPSERT` lookups. + +### `forceIndexHint` + +Makes the index or indexes specified in `indexHint` mandatory if enabled. The +default is `false`. Also see +[`forceIndexHint` Option of the `FOR` Operation](for.md#forceindexhint). + +```aql +UPSERT { a: 1234 } + INSERT { a: 1234, name: "AB" } + UPDATE { name: "ABC" } IN myCollection + OPTIONS { indexHint: … , forceIndexHint: true } +``` + +## Returning documents + +`UPSERT` statements can optionally return data. To do so, they need to be followed +by a `RETURN` statement (intermediate `LET` statements are allowed, too). These statements +can optionally perform calculations and refer to the pseudo-values `OLD` and `NEW`. +In case the upsert performed an insert operation, `OLD` has a value of `null`. +In case the upsert performed an update or replace operation, `OLD` contains the +previous version of the document, before update/replace. + +`NEW` is always populated. It contains the inserted document in case the +upsert performed an insert, or the updated/replaced document in case it performed an +update/replace. + +This can also be used to check whether the upsert has performed an insert or an update +internally: + +```aql +UPSERT { name: 'superuser' } +INSERT { name: 'superuser', logins: 1, dateCreated: DATE_NOW() } +UPDATE { logins: OLD.logins + 1 } IN users +RETURN { doc: NEW, type: OLD ? 'update' : 'insert' } +``` + +## Transactionality and Limitations + +- On a single server, upserts are generally executed transactionally in an + all-or-nothing fashion. + + For sharded collections in cluster deployments, the entire query and/or upsert + operation may not be transactional, especially if it involves different shards, + DB-Servers, or both. + +- Queries may execute intermediate transaction commits in case the running + transaction (AQL query) hits the specified size thresholds. This writes the + data that has been modified so far and it is not rolled back in case of a later + abort/rollback of the transaction. + + Such **intermediate commits** can occur for `UPSERT` operations over all + documents of a large collection, for instance. This has the side-effect that + atomicity of this operation cannot be guaranteed anymore and ArangoDB cannot + guarantee that "read your own writes" in upserts work. + + This is only an issue if you write a query where your search condition would + hit the same document multiple times, and only if you have large transactions. + You can adjust the behavior of the RocksDB storage engine by increasing the + `intermediateCommit` thresholds for data size and operation counts. + +- The lookup and the insert/update/replace parts are executed one after + another, so that other operations in other threads can happen in + between. This means if multiple `UPSERT` queries run concurrently, they + may all determine that the target document does not exist and then + create it multiple times! + + Note that due to this gap between the lookup and insert/update/replace, + even with a unique index, duplicate key errors or conflicts can occur. + But if they occur, the application/client code can execute the same query + again. + + To prevent this from happening, you should add a unique index to the lookup + attribute(s). Note that in the cluster a unique index can only be created if + it is equal to the shard key attribute of the collection or at least contains + it as a part. + + An alternative to making an UPSERT statement work atomically is to use the + `exclusive` option to limit write concurrency for this collection to 1, which + helps avoiding conflicts but is bad for throughput! + +- `UPSERT` operations do not observe their own writes correctly in cluster + deployments. They only do for OneShard databases with the `cluster-one-shard` + optimizer rule active. + + If upserts in a query create new documents and would then semantically hit the + same documents again, the operation may incorrectly use the `INSERT` branch to + create more documents instead of the `UPDATE`/`REPLACE` branch to update the + previously created documents. + + If upserts find existing documents for updating/replacing, you can access the + current document via the `OLD` pseudo-variable, but this may hold the initial + version of the document from before the query even if it has been modified + by `UPSERT` in the meantime. + +- The lookup attribute(s) from the search expression should be indexed in order + to improve the `UPSERT` performance. Ideally, the search expression contains the + shard key, as this allows the lookup to be restricted to a single shard. diff --git a/site/content/arangodb/oem/aql/high-level-operations/window.md b/site/content/arangodb/oem/aql/high-level-operations/window.md new file mode 100644 index 0000000000..494345d4d7 --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/window.md @@ -0,0 +1,282 @@ +--- +title: '`WINDOW` operation in AQL' +menuTitle: WINDOW +weight: 45 +description: >- + Aggregate adjacent documents or value ranges with a sliding window to + calculate running totals, rolling averages, and other statistical properties +--- +The `WINDOW` operation can be used for aggregations over adjacent documents, or +preceding and / or following rows in other words. It can also aggregate based +on a value or duration range relative to a document attribute. + +The operation performs a `COLLECT AGGREGATE`-like operation on a set +of query rows. However, whereas a `COLLECT` operation groups multiple query +rows into a single result group, a `WINDOW` operation produces a result for +each query row: + +- The row for which function evaluation occurs is called the current row. +- The query rows related to the current row over which function evaluation + occurs, comprise the window frame for the current row. + +Window frames are determined with respect to the current row: + +- By defining a window frame to be all rows from the query start to the current + row, you can compute running totals for each row. +- By defining a frame as extending *N* rows on either side of the current row, + you can compute rolling averages. + +## Syntax + +There are two syntax variants for `WINDOW` operations. + +**Row-based** (adjacent documents): + +
WINDOW { preceding: numPrecedingRows, following: numFollowingRows } AGGREGATE variableName = aggregateExpression
+ +**Range-based** (value or duration range): + +
WINDOW rangeValue WITH { preceding: offsetPreceding, following: offsetFollowing } AGGREGATE variableName = aggregateExpression
+ +Calls to the following functions are supported in aggregation expressions: +- `LENGTH()` / `COUNT()` +- `MIN()` +- `MAX()` +- `SUM()` +- `AVERAGE()` / `AVG()` +- `STDDEV_POPULATION()` / `STDDEV()` +- `STDDEV_SAMPLE()` +- `VARIANCE_POPULATION()` / `VARIANCE()` +- `VARIANCE_SAMPLE()` +- `UNIQUE()` +- `SORTED_UNIQUE()` +- `COUNT_DISTINCT()` / `COUNT_UNIQUE()` +- `BIT_AND()` +- `BIT_OR()` +- `BIT_XOR()` + +## Row-based Aggregation + +The first syntax form of `WINDOW` allows aggregating over a fixed number of +rows, following or preceding the current row. It is also possible to define +that **all** preceding or following rows should be aggregated (`"unbounded"`). +The number of rows has to be determined at query compile time. + +Below query demonstrates the use of window frames to compute **running totals** +as well as **rolling averages** computed from the current row and the rows that +immediately precede and follow it: + +```aql +--- +name: windowAggregationRow +description: '' +dataset: observationsSampleDataset +--- +FOR t IN observations + SORT t.time + WINDOW { preceding: 1, following: 1 } + AGGREGATE rollingAverage = AVG(t.val), rollingSum = SUM(t.val) + WINDOW { preceding: "unbounded", following: 0} + AGGREGATE cumulativeSum = SUM(t.val) + RETURN { + time: t.time, + subject: t.subject, + val: t.val, + rollingAverage, // average of the window's values + rollingSum, // sum of the window's values + cumulativeSum // running total + } +``` + +The row order is controlled by the `SORT` operation on the `time` attribute. + +The first `WINDOW` operation aggregates the previous, current, and next row +(preceding and following is set to 1) and calculates the average and sum of +these three values. In case of the first row, there is no preceding row but a +following row, hence the values `10` and `0` are added up to calculate the sum, +which is divided by 2 to compute the average. For the second row, the values +`10`, `0` and `9` are summed up and divided by 3, and so on. + +The second `WINDOW` operation aggregates all previous values (unbounded) to +calculate a running sum. For the first row, that is just `10`, for the second +row it is `10` + `0`, for the third `10` + `0` + `9`, and so on. + +| time | subject | val | rollingAverage | rollingSum | cumulativeSum | +|---------------------|---------|----:|---------------:|-----------:|--------------:| +| 2021-05-25 07:00:00 | st113 | 10 | 5 | 10 | 10 | +| 2021-05-25 07:00:00 | xh458 | 0 | 6.333… | 19 | 10 | +| 2021-05-25 07:15:00 | st113 | 9 | 6.333… | 19 | 19 | +| 2021-05-25 07:15:00 | xh458 | 10 | 14.666… | 44 | 29 | +| 2021-05-25 07:30:00 | st113 | 25 | 13.333… | 40 | 54 | +| 2021-05-25 07:30:00 | xh458 | 5 | 16.666… | 50 | 59 | +| 2021-05-25 07:45:00 | st113 | 20 | 18.333… | 55 | 79 | +| 2021-05-25 07:45:00 | xh458 | 30 | 25 | 75 | 109 | +| 2021-05-25 08:00:00 | xh458 | 25 | 27.5 | 55 | 134 | + +The below query demonstrates the use of window frames to compute running totals +within each `subject` group of `time`-ordered query rows, as well as rolling +sums and averages computed from the current row and the rows that immediately +precede and follow it, also per `subject` group and sorted by `time`: + +```aql +--- +name: windowAggregationRowGrouped +description: '' +dataset: observationsSampleDataset +--- +FOR t IN observations + COLLECT subject = t.subject INTO group = t + LET subquery = (FOR t2 IN group + SORT t2.time + WINDOW { preceding: 1, following: 1 } + AGGREGATE rollingAverage = AVG(t2.val), rollingSum = SUM(t2.val) + WINDOW { preceding: "unbounded", following: 0 } + AGGREGATE cumulativeSum = SUM(t2.val) + RETURN { + time: t2.time, + subject: t2.subject, + val: t2.val, + rollingAverage, + rollingSum, + cumulativeSum + } + ) + // flatten subquery result + FOR t2 IN subquery + RETURN t2 +``` + +If you look at the first row with the subject `xh458`, then you can see the +cumulative sum reset and that the rolling average and sum does not take the +previous row into account that belongs to subject `st113`. + +| time | subject | val | rollingAverage | rollingSum | cumulativeSum | +|---------------------|---------|----:|---------------:|-----------:|--------------:| +| 2021-05-25 07:00:00 | st113 | 10 | 9.5 | 19 | 10 | +| 2021-05-25 07:15:00 | st113 | 9 | 14.666… | 44 | 19 | +| 2021-05-25 07:30:00 | st113 | 25 | 18 | 54 | 44 | +| 2021-05-25 07:45:00 | st113 | 20 | 22.5 | 45 | 64 | +| 2021-05-25 07:00:00 | xh458 | 0 | 5 | 10 | 0 | +| 2021-05-25 07:15:00 | xh458 | 10 | 5 | 15 | 10 | +| 2021-05-25 07:30:00 | xh458 | 5 | 15 | 45 | 15 | +| 2021-05-25 07:45:00 | xh458 | 30 | 20 | 60 | 45 | +| 2021-05-25 08:00:00 | xh458 | 25 | 27.5 | 55 | 70 | + +## Range-based Aggregation + +The second syntax form of `WINDOW` allows aggregating over a all documents +within a value range. Offsets are differences in attribute values from the +current document. + +Attribute values have to be numeric. The offset calculations are performed by +adding or subtracting the numeric offsets specified in the `following` and +`preceding` attribute. The offset numbers have to be positive and have to be +determined at query compile time. The default offset is `0`. + +The range based window syntax requires the input rows to be sorted by the row +value. To ensure correctness of the result, the AQL optimizer will +automatically insert a `SORT` statement into the query in front of the `WINDOW` +statement. The optimizer may be able to optimize away that `SORT` statement +later if a sorted index is present on the group criteria. + +The following query demonstrates the use of window frames to compute totals as +well as averages computed from the current document and the documents that have +attribute values in `t.val` in the range of `[-10, +5]` (inclusive), preceding +and following: + +```aql +--- +name: windowAggregationRangeValue +description: '' +dataset: observationsSampleDataset +--- +FOR t IN observations + WINDOW t.val WITH { preceding: 10, following: 5 } + AGGREGATE rollingAverage = AVG(t.val), rollingSum = SUM(t.val) + RETURN { + time: t.time, + subject: t.subject, + val: t.val, + rollingAverage, + rollingSum + } +``` + +The value range of the first row is `[-10, 5]` since `val` is `0`, thus the +values from the first and second row are added up to `5` with the average being +`2.5`. The value range of the last row is `[20, 35]` as `val` is `30`, which +means that the last four rows get aggregated to a sum of `100` and an average +of `25` (the range is inclusive, i.e. `val` falls within the range with a value +of `20`). + +| time | subject | val | rollingAverage | rollingSum | +|---------------------|---------|----:|---------------:|-----------:| +| 2021-05-25 07:00:00 | xh458 | 0 | 2.5 | 5 | +| 2021-05-25 07:30:00 | xh458 | 5 | 6.8 | 34 | +| 2021-05-25 07:15:00 | st113 | 9 | 6.8 | 34 | +| 2021-05-25 07:00:00 | st113 | 10 | 6.8 | 34 | +| 2021-05-25 07:15:00 | xh458 | 10 | 6.8 | 34 | +| 2021-05-25 07:45:00 | st113 | 20 | 18 | 90 | +| 2021-05-25 07:30:00 | st113 | 25 | 25 | 100 | +| 2021-05-25 08:00:00 | xh458 | 25 | 25 | 100 | +| 2021-05-25 07:45:00 | xh458 | 30 | 25 | 100 | + +## Duration-based Aggregation + +Aggregating by time intervals is a subtype of range-based aggregation that +uses the second syntax form of `WINDOW` but with ISO durations. + +To support `WINDOW` frames over time-series data the `WINDOW` operation may +calculate timestamp offsets using positive ISO 8601 duration strings, like +`P1Y6M` (1 year and 6 months) or `PT12H30M` (12 hours and 30 minutes). Also see +[Date functions](../functions/date.md#comparison-and-calculation). +In contrast to the ISO 8601 standard, week components may be freely combined +with other components. For example, `P1WT1H` and `P1M1W` are both valid. +Fractional values are only supported for seconds, and only with up to three +decimals after the separator, i.e., millisecond precision. For example, +`PT0.123S` is a valid duration while `PT0.5H` and `PT0.1234S` are not. + +Durations can be specified separately in `following` and `preceding`. +If such a duration is used, then the attribute value of the current document +must be a number and is treated as numeric **timestamp in milliseconds**. +The range is inclusive. If either bound is not specified, it is treated as an +empty duration (i.e., `P0D`). + +The following query demonstrates the use of window frames to compute rolling +sums and averages over observations in the last 30 minutes (inclusive), based +on the document attribute `time` that is converted from a datetime string to a +numeric timestamp: + +```aql +--- +name: windowAggregationRangeDuration +description: '' +dataset: observationsSampleDataset +--- +FOR t IN observations + WINDOW DATE_TIMESTAMP(t.time) WITH { preceding: "PT30M" } + AGGREGATE rollingAverage = AVG(t.val), rollingSum = SUM(t.val) + RETURN { + time: t.time, + subject: t.subject, + val: t.val, + rollingAverage, + rollingSum + } +``` + +With a time of `07:30:00`, everything from `07:00:00` to `07:30:00` on the same +day falls within the duration range with `preceding: "PT30M"`, thus aggregating +the top six rows to a sum of `59` and an average of `9.8333…`. + +| time | subject | val | rollingAverage | rollingSum | +|---------------------|---------|----:|---------------:|-----------:| +| 2021-05-25 07:00:00 | st113 | 10 | 5 | 10 | +| 2021-05-25 07:00:00 | xh458 | 0 | 5 | 10 | +| 2021-05-25 07:15:00 | st113 | 9 | 7.25 | 29 | +| 2021-05-25 07:15:00 | xh458 | 10 | 7.25 | 29 | +| 2021-05-25 07:30:00 | st113 | 25 | 9.8333… | 59 | +| 2021-05-25 07:30:00 | xh458 | 5 | 9.8333… | 59 | +| 2021-05-25 07:45:00 | st113 | 20 | 16.5 | 99 | +| 2021-05-25 07:45:00 | xh458 | 30 | 16.5 | 99 | +| 2021-05-25 08:00:00 | xh458 | 25 | 21 | 105 | diff --git a/site/content/arangodb/oem/aql/high-level-operations/with.md b/site/content/arangodb/oem/aql/high-level-operations/with.md new file mode 100644 index 0000000000..66742365dd --- /dev/null +++ b/site/content/arangodb/oem/aql/high-level-operations/with.md @@ -0,0 +1,71 @@ +--- +title: '`WITH` operation in AQL' +menuTitle: WITH +weight: 75 +description: >- + An AQL query can start with a `WITH` operation, listing collections that a + query implicitly reads from +--- +Reading implicitly from a collections means that the collections are not +specified explicitly in language constructs like the following: + +- `FOR ... IN collection` +- `INSERT ... INTO collection` +- `UPDATE ... IN collection` +- `GRAPH "graph-name"` (via the graph definition) + +Instead, the collections are only known at runtime of the query. Such dynamic +collection access is invisible to the AQL query parser at query compile time. +Dynamic access is possible via the `DOCUMENT()` function as well as with +graph traversals (in particular the variant using collection sets), because +edges may point to arbitrary vertex collections. Additionally, if you specify +the start vertex of a traversal using a string, its collection needs to be +declared as well. + +Collections that are explicitly used in a query are automatically detected by +the AQL query parser. Any additional collections that will be involved in the +query but cannot be detected automatically by the query parser can be manually +specified using a `WITH` statement. It is recommended to declare all collections +that the `DOCUMENT()` function or graph traversals using collection sets might +possibly access to avoid occasional query failures. + +## Syntax + +
WITH collection1 [, collection2 [, ... collectionN ] ]
+ +`WITH` is also a keyword that is used in other contexts, for example in `UPDATE` +statements. To declare additional collections, you must place the `WITH` keyword +at the very start of the query. + +## Usage + +The `WITH` operation is only required if you use a cluster deployment and only +for AQL queries that dynamically read from vertex collections as part of +graph traversals. + +You can enable the `--query.require-with` startup option to make single server +instances require `WITH` declarations like cluster deployments to ease development, +see [Requiring `WITH` statements](../../components/arangodb-server/options.md#--queryrequire-with). + +Dynamic access via the `DOCUMENT()` function does not require you to list the +involved collections. Using named graphs in traversals (`GRAPH "graph-name"`) +does not require it either, assuming that all vertices are in collections that +are part of the graph, as enforced by the [Graph API](../../develop/http-api/graphs/named-graphs.md). +That means, it is only necessary for traversals using anonymous graphs / +[collection sets](../graph-queries/traversals.md#working-with-collection-sets). + +The following example query specifies an edge collection `usersHaveManagers` +to perform a graph traversal. It is the only explicitly specified collection in +the query. It does not need to be declared using the `WITH` operation. + +However, the involved vertex collections need to be declared. In this example, +the start vertex is specified as a string and it is stored in the `users` +collections. Furthermore, the edges of the edge collection reference vertices of +a collection called `managers`. Both collections are declared at the beginning +of the query using the `WITH` operation: + +```aql +WITH users, managers +FOR v, e, p IN 1..2 OUTBOUND 'users/1' usersHaveManagers + RETURN { v, e, p } +``` diff --git a/site/content/arangodb/oem/aql/how-to-invoke-aql/_index.md b/site/content/arangodb/oem/aql/how-to-invoke-aql/_index.md new file mode 100644 index 0000000000..d8c7d28e57 --- /dev/null +++ b/site/content/arangodb/oem/aql/how-to-invoke-aql/_index.md @@ -0,0 +1,30 @@ +--- +title: How to execute AQL queries +menuTitle: How to invoke AQL +weight: 5 +description: '' +--- +AQL queries can be invoked in the following ways: + +- Via the web interface +- Using the `db` object of the JavaScript API, for example, in arangosh or in a Foxx service +- Via the raw REST HTTP API + +There are always calls to the server's HTTP API under the hood, but the web interface +and the `db` object abstract away the low-level communication details and are +thus easier to use. + +The ArangoDB web interface has a specific section for [**QUERIES**](with-the-web-interface.md). + +You can run [AQL queries from the ArangoDB Shell](with-arangosh.md) +with the [`db._query()`](with-arangosh.md#with-db_query) and +[`db._createStatement()`](with-arangosh.md#with-db_createstatement-arangostatement) +methods of the [`db` object](../../develop/javascript-api/@arangodb/db-object.md). This chapter +also describes how to use bind parameters, statistics, counting, and cursors with +arangosh. + +If you use Foxx microservices, see [how to write database queries](../../develop/foxx-microservices/getting-started.md#writing-database-queries) +for examples including tagged template strings. + +If you want to run AQL queries from your application via the HTTP REST API, +see the full API description at [HTTP interface for AQL queries](../../develop/http-api/queries/aql-queries.md). diff --git a/site/content/arangodb/oem/aql/how-to-invoke-aql/with-arangosh.md b/site/content/arangodb/oem/aql/how-to-invoke-aql/with-arangosh.md new file mode 100644 index 0000000000..c430c0efce --- /dev/null +++ b/site/content/arangodb/oem/aql/how-to-invoke-aql/with-arangosh.md @@ -0,0 +1,786 @@ +--- +title: Executing AQL queries from _arangosh_ +menuTitle: with arangosh +weight: 5 +description: >- + How to run queries, set bind parameters, and obtain the resulting and + additional information using the JavaScript API +# Undocumented on purpose: +# db._query(, , , { forceOneShardAttributeValue: "..."} ) +--- +In the ArangoDB shell, you can use the `db._query()` and `db._createStatement()` +methods to execute AQL queries. This chapter also describes +how to use bind parameters, counting, statistics and cursors. + +## With `db._query()` + +`db._query() → cursor` + +You can execute queries with the `_query()` method of the `db` object. +This runs the specified query in the context of the currently +selected database and returns the query results in a cursor. +You can print the results of the cursor using its `toArray()` method: + +```js +--- +name: 01_workWithAQL_all +description: '' +--- +~addIgnoreCollection("mycollection") +var coll = db._create("mycollection") +var doc = db.mycollection.save({ _key: "testKey", Hello : "World" }) +db._query('FOR my IN mycollection RETURN my._key').toArray() +``` + +### `db._query()` bind parameters + +`db._query(, ) → cursor` + +To pass bind parameters into a query, you can specify a second argument when +calling the `_query()` method: + +```js +--- +name: 02_workWithAQL_bindValues +description: '' +--- +db._query('FOR c IN @@collection FILTER c._key == @key RETURN c._key', { + '@collection': 'mycollection', + 'key': 'testKey' +}).toArray(); +``` + +### ES6 template strings + +`` aql`` `` + +It is also possible to use ES6 template strings for generating AQL queries. There is +a template string generator function named `aql`. + +The following example demonstrates what the template string function generates: + +```js +--- +name: 02_workWithAQL_aqlTemplateString +description: '' +--- +var key = 'testKey'; +aql`FOR c IN mycollection FILTER c._key == ${key} RETURN c._key` +``` + +The next example directly uses the generated result to execute a query: + +```js +--- +name: 02_workWithAQL_aqlQuery +description: '' +--- +var key = 'testKey'; +db._query( + aql`FOR c IN mycollection FILTER c._key == ${key} RETURN c._key` +).toArray(); +``` + +Arbitrary JavaScript expressions can be used in queries that are generated with the +`aql` template string generator. Collection objects are handled automatically: + +```js +--- +name: 02_workWithAQL_aqlCollectionQuery +description: '' +--- +var key = 'testKey'; +db._query(aql`FOR doc IN ${ db.mycollection } RETURN doc`).toArray(); +``` + +Note: data-modification AQL queries normally do not return a result unless the +AQL query contains a `RETURN` operation at the top-level. Without a `RETURN` +operation, the `toArray()` method returns an empty array. + +### Statistics and extra Information + +`cursor.getExtra() → queryInfo` + +It is always possible to retrieve statistics for a query with the `getExtra()` method: + +```js +--- +name: 03_workWithAQL_getExtra +description: '' +--- +db._query(` + FOR i IN 1..100 + INSERT { _key: CONCAT('test', TO_STRING(i)) } INTO mycollection +`).getExtra(); +``` + +The meaning of the statistics values is described in +[Query statistics](../execution-and-performance/query-statistics.md). + +Query warnings are also reported here. If you design queries on the shell, +be sure to check for warnings. + +### Main query options + +`db._query(, , , ) → cursor` + +You can pass the main options as the third argument to `db._query()` if you +also pass a fourth argument with the sub options (can be an empty object `{}`). + +#### `count` + +Whether the number of documents in the result set should be calculated on the +server side and returned in the `count` attribute of the result. Calculating the +`count` attribute might have a performance impact for some queries so this +option is turned off by default, and only returned when requested. + +If enabled, you can get the count by calling the `count()` method of the cursor. +You can also count the number of results on the client side, for example, using +`cursor.toArray().length`. + +```js +--- +name: 02_workWithAQL_count +description: '' +--- +var cursor = db._query( + 'FOR i IN 1..42 RETURN i', + {}, + { count: true }, + {} +); +cursor.count(); +cursor.toArray().length; +``` + +#### `batchSize` + +The maximum number of result documents to be transferred from the server to the +client in one roundtrip. If this attribute is not set, a server-controlled +default value is used. A `batchSize` value of `0` is disallowed. + +```js +--- +name: 02_workWithAQL_batchSize +description: '' +--- +db._query( + 'FOR i IN 1..3 RETURN i', + {}, + { batchSize: 2 }, + {} +).toArray(); // full result retrieved in two batches +``` + +#### `ttl` + +The time-to-live for the cursor (in seconds). If the result set is small enough +(less than or equal to `batchSize`), then results are returned right away. +Otherwise, they are stored in memory and are accessible via the cursor with +respect to the `ttl`. The cursor is removed on the server automatically after +the specified amount of time. This is useful to ensure garbage collection of +cursors that are not fully fetched by clients. If not set, a server-defined +value is used (default: 30 seconds). + +```js +--- +name: 02_workWithAQL_ttl +description: '' +--- +db._query( + 'FOR i IN 1..20 RETURN i', + {}, + { ttl: 5, batchSize: 10 }, + {} +).toArray(); // Each batch needs to be fetched within 5 seconds +``` + +#### `memoryLimit` + +To set a memory limit for the query, pass `options` to the `_query()` method. +The memory limit specifies the maximum number of bytes that the query is +allowed to use. When a single AQL query reaches the specified limit value, +the query will be aborted with a *resource limit exceeded* exception. In a +cluster, the memory accounting is done per shard, so the limit value is +effectively a memory limit per query per shard. + +```js +--- +name: 02_workWithAQL_memoryLimit +description: '' +--- +db._query( + 'FOR i IN 1..100000 SORT i RETURN i', + {}, + { memoryLimit: 100000 } +).toArray(); // xpError(ERROR_RESOURCE_LIMIT) +``` + +If no memory limit is specified, then the server default value (controlled by +the `--query.memory-limit` startup option) is used for restricting the maximum amount +of memory the query can use. A memory limit value of `0` means that the maximum +amount of memory for the query is not restricted. + +### Query sub options + +`db._query(, , ) → cursor` + +`db._query(, , , ) → cursor` + +You can pass the sub options as the third argument to `db._query()` if you don't +provide main options, or as fourth argument if you do. + +#### `fullCount` + +If you set `fullCount` to `true` and if the query contains a `LIMIT` operation, then the +result has an extra attribute with the sub-attributes `stats` and `fullCount`, like +`{ ... , "extra": { "stats": { "fullCount": 123 } } }`. The `fullCount` attribute +contains the number of documents in the result before the last top-level `LIMIT` in the +query was applied. It can be used to count the number of documents that match certain +filter criteria, but only return a subset of them, in one go. It is thus similar to +MySQL's `SQL_CALC_FOUND_ROWS` hint. Note that setting the option disables a few +`LIMIT` optimizations and may lead to more documents being processed, and thus make +queries run longer. Note that the `fullCount` attribute may only be present in the +result if the query has a top-level `LIMIT` operation and the `LIMIT` operation +is actually used in the query. + +#### `failOnWarning` +If you set `failOnWarning` to `true`, this makes the query throw an exception and +abort in case a warning occurs. You should use this option in development to catch +errors early. If set to `false`, warnings don't propagate to exceptions and are +returned with the query results. There is also a `--query.fail-on-warning` +startup options for setting the default value for `failOnWarning`, so that you +don't need to set it on a per-query level. + +#### `cache` + +Whether the [AQL query results cache](../execution-and-performance/caching-query-results.md) +shall be used for adding as well as for retrieving results. + +If the query cache mode is set to `demand` and you set the `cache` query option +to `true` for a query, then its query result is cached if it's eligible for +caching. If the query cache mode is set to `on`, query results are automatically +cached if they are eligible for caching unless you set the `cache` option to `false`. + +If you set the `cache` option to `false`, then any query cache lookup is skipped +for the query. If you set it to `true`, the query cache is checked a cached result +**if** the query cache mode is either set to `on` or `demand`. + +```js +--- +name: 02_workWithAQL_cache +description: '' +--- +var resultCache = require("@arangodb/aql/cache"); +resultCache.properties({ mode: "demand" }); +~resultCache.clear(); +db._query("FOR i IN 1..5 RETURN i", {}, { cache: true }); // Adds result to cache +db._query("FOR i IN 1..5 RETURN i", {}, { cache: true }); // Retrieves result from cache +db._query("FOR i IN 1..5 RETURN i", {}, { cache: false }); // Bypasses the cache +``` + +#### `fillBlockCache` + +If you set `fillBlockCache` to `true` or not specify it, this makes the query store +the data it reads via the RocksDB storage engine in the RocksDB block cache. This is +usually the desired behavior. You can set the option to `false` for queries that are +known to either read a lot of data that would thrash the block cache, or for queries +that read data known to be outside of the hot set. By setting the option +to `false`, data read by the query does not make it into the RocksDB block cache if +it is not already in there, thus leaving more room for the actual hot set. + +#### `profile` + +If you set `profile` to `true` or `1`, extra timing information is returned for the query. +The timing information is accessible via the `getExtra()` method of the query +result. If set to `2`, the query includes execution statistics per query plan +execution node in `stats.nodes` sub-attribute of the `extra` return attribute. +Additionally, the query plan is returned in the `extra.plan` sub-attribute. + +#### `maxWarningCount` + +The `maxWarningCount` option limits the number of warnings that are returned by the query if +`failOnWarning` is not set to `true`. The default value is `10`. + +#### `maxNumberOfPlans` + +The `maxNumberOfPlans` option limits the number of query execution plans the optimizer +creates at most. Reducing the number of query execution plans may speed up query plan +creation and optimization for complex queries, but normally there is no need to adjust +this value. + +#### `optimizer` + +Options related to the query optimizer. + +- `rules`: A list of to-be-included or to-be-excluded optimizer rules can be put into + this attribute, telling the optimizer to include or exclude specific rules. To disable + a rule, prefix its name with a `-`, to enable a rule, prefix it with a `+`. There is also + a pseudo-rule `all`, which matches all optimizer rules. `-all` disables all rules. + +#### `allowRetry` + +Set this option to `true` to make it possible to retry fetching the latest batch +from a cursor. + +{{< info >}} +This feature cannot be used on the server-side, like in [Foxx](../../develop/foxx-microservices/_index.md), as +there is no client connection and no batching. +{{< /info >}} + +If retrieving a result batch fails because of a connection issue, you can ask +for that batch again using the `POST /_api/cursor//` +endpoint. The first batch has an ID of `1` and the value is incremented by 1 +with every batch. Every result response except the last one also includes a +`nextBatchId` attribute, indicating the ID of the batch after the current. +You can remember and use this batch ID should retrieving the next batch fail. + +You can only request the latest batch again (or the next batch). +Earlier batches are not kept on the server-side. +Requesting a batch again does not advance the cursor. + +You can also call this endpoint with the next batch identifier, i.e. the value +returned in the `nextBatchId` attribute of a previous request. This advances the +cursor and returns the results of the next batch. This is only supported if there +are more results in the cursor (i.e. `hasMore` is `true` in the latest batch). + +From v3.11.1 onward, you may use the `POST /_api/cursor//` +endpoint even if the `allowRetry` attribute is `false` to fetch the next batch, +but you cannot request a batch again unless you set it to `true`. + +To allow refetching of the last batch of the query, the server cannot +automatically delete the cursor. After the first attempt of fetching the last +batch, the server would normally delete the cursor to free up resources. As you +might need to reattempt the fetch, it needs to keep the final batch when the +`allowRetry` option is enabled. Once you successfully received the last batch, +you should call the `DELETE /_api/cursor/` endpoint so that the +server doesn't unnecessarily keep the batch until the cursor times out +(`ttl` query option). + +#### `stream` + +Set `stream` to `true` to execute the query in a **streaming** fashion. +The query result is not stored on the server, but calculated on the fly. + +{{< warning >}} +Long-running queries need to hold the collection locks for as long as the query +cursor exists. It is advisable to **only** use this option on short-running +queries **or** without exclusive locks. +{{< /warning >}} + +If set to `false`, the query is executed right away in its entirety. +In that case, the query results are either returned right away (if the result +set is small enough), or stored on the arangod instance and can be accessed +via the cursor API. + +The default value is `false`. + +{{< info >}} +The query options `cache`, `count` and `fullCount` don't work on streaming +queries. Additionally, query statistics, profiling data, and warnings are only +available after the query has finished and are delivered as part of the last batch. +{{< /info >}} + +#### `maxRuntime` + +The query has to be executed within the given runtime or it is killed. +The value is specified in seconds. The default value is `0.0` (no timeout). + +#### `maxDNFConditionMembers` + +Introduced in: v3.11.0 + +A threshold for the maximum number of `OR` sub-nodes in the internal +representation of an AQL `FILTER` condition. + +Yon can use this option to limit the computation time and memory usage when +converting complex AQL `FILTER` conditions into the internal DNF +(disjunctive normal form) format. `FILTER` conditions with a lot of logical +branches (`AND`, `OR`, `NOT`) can take a large amount of processing time and +memory. This query option limits the computation time and memory usage for +such conditions. + +Once the threshold value is reached during the DNF conversion of a `FILTER` +condition, the conversion is aborted, and the query continues with a simplified +internal representation of the condition, which **cannot be used for index lookups**. + +You can also set the threshold globally instead of per query with the +[`--query.max-dnf-condition-members` startup option](../../components/arangodb-server/options.md#--querymax-dnf-condition-members). + +#### `maxNodesPerCallstack` + +The number of execution nodes in the query plan after +that stack splitting is performed to avoid a potential stack overflow. +Defaults to the configured value of the startup option +`--query.max-nodes-per-callstack`. + +This option is only useful for testing and debugging and normally does not need +any adjustment. + +#### `maxTransactionSize` + +The transaction size limit in bytes. + +#### `intermediateCommitSize` + +The maximum total size of operations after which an intermediate +commit is performed automatically. + +#### `intermediateCommitCount` + +The maximum number of operations after which an intermediate +commit is performed automatically. + +#### `spillOverThresholdMemoryUsage` + +Introduced in: v3.10.0 + +This option allows queries to store intermediate and final results temporarily +on disk if the amount of memory used (in bytes) exceeds the specified value. +This is used for decreasing the memory usage during the query execution. + +This option only has an effect on queries that use the `SORT` operation but +without a `LIMIT`, and if you enable the spillover feature by setting a path +for the directory to store the temporary data in with the +[`--temp.intermediate-results-path` startup option](../../components/arangodb-server/options.md#--tempintermediate-results-path). + +Default value: 128MB. + +{{< info >}} +Spilling data from RAM onto disk is an experimental feature and is turned off +by default. The query results are still built up entirely in RAM on Coordinators +and single servers for non-streaming queries. To avoid the buildup of +the entire query result in RAM, use a streaming query (see the +[`stream`](#stream) option). +{{< /info >}} + +#### `spillOverThresholdNumRows` + +Introduced in: v3.10.0 + +This option allows queries to store intermediate and final results temporarily +on disk if the number of rows produced by the query exceeds the specified value. +This is used for decreasing the memory usage during the query execution. In a +query that iterates over a collection that contains documents, each row is a +document, and in a query that iterates over temporary values +(i.e. `FOR i IN 1..100`), each row is one of such temporary values. + +This option only has an effect on queries that use the `SORT` operation but +without a `LIMIT`, and if you enable the spillover feature by setting a path +for the directory to store the temporary data in with the +[`--temp.intermediate-results-path` startup option](../../components/arangodb-server/options.md#--tempintermediate-results-path). + +Default value: `5000000` rows. + +{{< info >}} +Spilling data from RAM onto disk is an experimental feature and is turned off +by default. The query results are still built up entirely in RAM on Coordinators +and single servers for non-streaming queries. To avoid the buildup of +the entire query result in RAM, use a streaming query (see the +[`stream`](#stream) option). +{{< /info >}} + +#### `allowDirtyReads` + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +Introduced in: v3.10.0 + +If you set this option to `true` and execute the query against a cluster +deployment, then the Coordinator is allowed to read from any shard replica and +not only from the leader. See [Read from followers](../../develop/http-api/documents.md#read-from-followers) +for details. + +#### `skipInaccessibleCollections` + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +Let AQL queries (especially graph traversals) treat collection to which a +user has **no access** rights for as if these collections are empty. +Instead of returning a *forbidden access* error, your queries execute normally. +This is intended to help with certain use-cases: A graph contains several collections +and different users execute AQL queries on that graph. You can naturally limit the +accessible results by changing the access rights of users on collections. + +#### `satelliteSyncWait` + +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +Configure how long a DB-Server has time to bring the SatelliteCollections +involved in the query into sync. The default value is `60.0` seconds. +When the maximal time is reached, the query is stopped. + +## With `db._createStatement()` (ArangoStatement) + +The `_query()` method is a shorthand for creating an `ArangoStatement` object, +executing it and iterating over the resulting cursor. If more control over the +result set iteration is needed, it is recommended to first create an +`ArangoStatement` object as follows: + +```js +--- +name: 04_workWithAQL_statements1 +description: '' +--- +stmt = db._createStatement( { "query": "FOR i IN [ 1, 2 ] RETURN i * 2" } ); +``` + +To execute the query, use the `execute()` method of the _statement_ object: + +```js +--- +name: 05_workWithAQL_statements2 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ 1, 2 ] RETURN i * 2" } ); +cursor = stmt.execute(); +``` + +You can pass a number to the `execute()` method to specify a batch size value. +The server returns at most this many results in one roundtrip. +The batch size cannot be adjusted after the query is first executed. + +**Note**: There is no need to explicitly call the execute method if another +means of fetching the query results is chosen. The following two approaches +lead to the same result: + +```js +--- +name: executeQueryNoBatchSize +description: '' +--- +~db._create("users"); +~db.users.save({ name: "Gerhard" }); +~db.users.save({ name: "Helmut" }); +~db.users.save({ name: "Angela" }); +var result = db.users.all().toArray(); +print(result); + +var q = db._query("FOR x IN users RETURN x"); +result = [ ]; +while (q.hasNext()) { + result.push(q.next()); +} +print(result); +~db._drop("users") +``` + +The following two alternatives both use a batch size and return the same +result: + +```js +--- +name: executeQueryBatchSize +description: '' +--- +~db._create("users"); +~db.users.save({ name: "Gerhard" }); +~db.users.save({ name: "Helmut" }); +~db.users.save({ name: "Angela" }); +var result = [ ]; +var q = db.users.all(); +q.execute(1); +while(q.hasNext()) { + result.push(q.next()); +} +print(result); + +result = [ ]; +q = db._query("FOR x IN users RETURN x", {}, { batchSize: 1 }); +while (q.hasNext()) { + result.push(q.next()); +} +print(result); +~db._drop("users") +``` + +### Cursors + +Once the query executed the query results are available in a cursor. +The cursor can return all its results at once using the `toArray()` method. +This is a short-cut that you can use if you want to access the full result +set without iterating over it yourself. + +```js +--- +name: 05_workWithAQL_statements3 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ 1, 2 ] RETURN i * 2" } ); +~var cursor = stmt.execute(); +cursor.toArray(); +``` + +Cursors can also be used to iterate over the result set document-by-document. +To do so, use the `hasNext()` and `next()` methods of the cursor: + +```js +--- +name: 05_workWithAQL_statements4 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ 1, 2 ] RETURN i * 2" } ); +~var c = stmt.execute(); +while (c.hasNext()) { + require("@arangodb").print(c.next()); +} +``` + +Please note that you can iterate over the results of a cursor only once, and that +the cursor will be empty when you have fully iterated over it. To iterate over +the results again, the query needs to be re-executed. + +Additionally, the iteration can be done in a forward-only fashion. There is no +backwards iteration or random access to elements in a cursor. + +### ArangoStatement parameters binding + +To execute an AQL query using bind parameters, you need to create a statement first +and then bind the parameters to it before execution: + +```js +--- +name: 05_workWithAQL_statements5 +description: '' +--- +var stmt = db._createStatement( { "query": "FOR i IN [ @one, @two ] RETURN i * 2" } ); +stmt.bind("one", 1); +stmt.bind("two", 2); +cursor = stmt.execute(); +``` + +The cursor results can then be dumped or iterated over as usual, e.g.: + +```js +--- +name: 05_workWithAQL_statements6 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ @one, @two ] RETURN i * 2" } ); +~stmt.bind("one", 1); +~stmt.bind("two", 2); +~var cursor = stmt.execute(); +cursor.toArray(); +``` + +or + +```js +--- +name: 05_workWithAQL_statements7 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ @one, @two ] RETURN i * 2" } ); +~stmt.bind("one", 1); +~stmt.bind("two", 2); +~var cursor = stmt.execute(); +while (cursor.hasNext()) { + require("@arangodb").print(cursor.next()); +} +``` + +Please note that bind parameters can also be passed into the `_createStatement()` +method directly, making it a bit more convenient: + +```js +--- +name: 05_workWithAQL_statements8 +description: '' +--- +stmt = db._createStatement({ + "query": "FOR i IN [ @one, @two ] RETURN i * 2", + "bindVars": { + "one": 1, + "two": 2 + } +}); +``` + +### Counting with a cursor + +Cursors also optionally provide the total number of results. By default, they do not. +To make the server return the total number of results, you may set the `count` attribute to +`true` when creating a statement: + +```js +--- +name: 05_workWithAQL_statements9 +description: '' +--- +stmt = db._createStatement( { + "query": "FOR i IN [ 1, 2, 3, 4 ] RETURN i", + "count": true } ); +``` + +After executing this query, you can use the `count` method of the cursor to get the +number of total results from the result set: + +```js +--- +name: 05_workWithAQL_statements10 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ 1, 2, 3, 4 ] RETURN i", "count": true } ); +var cursor = stmt.execute(); +cursor.count(); +``` + +Please note that the `count` method returns nothing if you did not specify the `count` +attribute when creating the query. + +This is intentional so that the server may apply optimizations when executing the query and +construct the result set incrementally. Incremental creation of the result sets +is no possible +if all of the results need to be shipped to the client anyway. Therefore, the client +has the choice to specify `count` and retrieve the total number of results for a query (and +disable potential incremental result set creation on the server), or to not retrieve the total +number of results and allow the server to apply optimizations. + +Please note that at the moment the server will always create the full result set for each query so +specifying or omitting the `count` attribute currently does not have any impact on query execution. +This may change in the future. Future versions of ArangoDB may create result sets incrementally +on the server-side and may be able to apply optimizations if a result set is not fully fetched by +a client. + +### Using cursors to obtain additional information on internal timings + +Cursors can also optionally provide statistics of the internal execution phases. By default, they do not. +To get to know how long parsing, optimization, instantiation and execution took, +make the server return that by setting the `profile` attribute to +`true` when creating a statement: + +```js +--- +name: 06_workWithAQL_statements11 +description: '' +--- +stmt = db._createStatement({ + query: "FOR i IN [ 1, 2, 3, 4 ] RETURN i", + options: {"profile": true}}); +``` + +After executing this query, you can use the `getExtra()` method of the cursor to get the +produced statistics: + +```js +--- +name: 06_workWithAQL_statements12 +description: '' +--- +~var stmt = db._createStatement( { "query": "FOR i IN [ 1, 2, 3, 4 ] RETURN i", options: {"profile": true}} ); +var cursor = stmt.execute(); +cursor.getExtra(); +``` + +## Query validation with `db._parse()` + +The `_parse()` method of the `db` object can be used to parse and validate a +query syntactically, without actually executing it. + +```js +--- +name: 06_workWithAQL_statements13 +description: '' +--- +db._parse( "FOR i IN [ 1, 2 ] RETURN i" ); +``` diff --git a/site/content/arangodb/oem/aql/how-to-invoke-aql/with-the-web-interface.md b/site/content/arangodb/oem/aql/how-to-invoke-aql/with-the-web-interface.md new file mode 100644 index 0000000000..abb1e651e2 --- /dev/null +++ b/site/content/arangodb/oem/aql/how-to-invoke-aql/with-the-web-interface.md @@ -0,0 +1,50 @@ +--- +title: Executing AQL queries in the ArangoDB web interface +menuTitle: with the Web Interface +weight: 10 +description: >- + You can run ad-hoc AQL queries using the query editor in the web interface +--- +In the **QUERIES** section of the web interface, type in a query in the main box +and execute it by clicking the **Execute** button. The query result is displayed +below the editor. + +The editor provides a few example queries that you can use as templates. +It also provides a feature to explain a query and inspect its execution plan +by clicking the **Explain** button. + +Bind parameters can be defined in the right-hand side pane. The format is the +same as used for bind parameters in the HTTP REST API and in (JavaScript) +application code. + +Here is an example: + +```aql +FOR doc IN @@collection + FILTER CONTAINS(LOWER(doc.author), @search, false) + RETURN { "name": doc.name, "descr": doc.description, "author": doc.author } +``` + +Bind parameters (table view mode): + +| Key | Value | +|-------------|--------| +| @collection | _apps | +| search | arango | + +Bind parameters (JSON view mode): + +```json +{ + "@collection": "_apps", + "search": "arango" +} +``` + +How bind parameters work can be found in [AQL Fundamentals](../fundamentals/bind-parameters.md). + +Queries can also be saved in the AQL editor along with their bind parameter values +for later reuse. This data is stored in the user profile in the current database +(in the `_users` system collection). + +Also see the detailed description of the [Web Interface](../../components/web-interface/_index.md). diff --git a/site/content/arangodb/oem/aql/operators.md b/site/content/arangodb/oem/aql/operators.md new file mode 100644 index 0000000000..d4c551fc29 --- /dev/null +++ b/site/content/arangodb/oem/aql/operators.md @@ -0,0 +1,816 @@ +--- +title: Operators +menuTitle: Operators +weight: 15 +description: >- + AQL supports a number of operators that can be used in expressions, + such as for arithmetic, comparing values, and logically combining conditions +--- +## Comparison operators + +Comparison (or relational) operators compare two operands. They can be used with +any input data types, and return a boolean result value. + +The following comparison operators are supported: + +| Operator | Description +|:-----------|:----------- +| `==` | equality +| `!=` | inequality +| `<` | less than +| `<=` | less or equal +| `>` | greater than +| `>=` | greater or equal +| `IN` | test if a value is contained in an array +| `NOT IN` | test if a value is not contained in an array +| `LIKE` | tests if a string value matches a pattern +| `NOT LIKE` | tests if a string value does not match a pattern +| `=~` | tests if a string value matches a regular expression +| `!~` | tests if a string value does not match a regular expression + +Each of the comparison operators returns a boolean value if the comparison can +be evaluated and returns *true* if the comparison evaluates to true, and *false* +otherwise. + +The comparison operators accept any data types for the first and second +operands. However, `IN` and `NOT IN` only return a meaningful result if +their right-hand operand is an array. `LIKE` and `NOT LIKE` only execute +if both operands are string values. All four operators do not perform +implicit type casts if the compared operands have different types, i.e. +they test for strict equality or inequality (`0` is different to `"0"`, +`[0]`, `false` and `null` for example). + +```aql + 0 == null // false + 1 > 0 // true + true != null // true + 45 <= "yikes!" // true + 65 != "65" // true + 65 == 65 // true + 1.23 > 1.32 // false + 1.5 IN [ 2, 3, 1.5 ] // true + "foo" IN null // false +42 NOT IN [ 17, 40, 50 ] // true + "abc" == "abc" // true + "abc" == "ABC" // false + "foo" LIKE "f%" // true + "foo" NOT LIKE "f%" // false + "foo" =~ "^f[o].$" // true + "foo" !~ "[a-z]+bar$" // true +``` + +The `LIKE` operator checks whether its left operand matches the pattern specified +in its right operand. The pattern can consist of regular characters and wildcards. +The supported wildcards are `_` to match a single arbitrary character, and `%` to +match any number of arbitrary characters. Literal `%` and `_` need to be escaped +with a backslash. Backslashes need to be escaped themselves, which effectively +means that two reverse solidus characters need to precede a literal percent sign +or underscore. In arangosh, additional escaping is required, making it four +backslashes in total preceding the to-be-escaped character. + +```aql + "abc" LIKE "a%" // true + "abc" LIKE "_bc" // true +"a_b_foo" LIKE "a\\_b\\_foo" // true +``` + +The pattern matching performed by the `LIKE` operator is case-sensitive. + +The `NOT LIKE` operator has the same characteristics as the `LIKE` operator +but with the result negated. It is thus identical to `NOT (… LIKE …)`. Note +the parentheses, which are necessary for certain expressions: + +```aql +FOR doc IN coll + RETURN NOT doc.attr LIKE "…" +``` + +The return expression gets transformed into `LIKE(!doc.attr, "…")`, leading +to unexpected results. `NOT(doc.attr LIKE "…")` gets transformed into the +more reasonable `! LIKE(doc.attr, "…")`. + +The regular expression operators `=~` and `!~` expect their left-hand operands to +be strings, and their right-hand operands to be strings containing valid regular +expressions as specified in the documentation for the AQL function +[`REGEX_TEST()`](functions/string.md#regex_test). + +## Array comparison operators + +Most comparison operators also exist as an *array variant*. In the array variant, +a `==`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, or `NOT IN` operator is prefixed with +an `ALL`, `ANY`, or `NONE` keyword. This changes the operator's behavior to +compare the individual array elements of the left-hand argument to the right-hand +argument. Depending on the quantifying keyword, all, any, or none of these +comparisons need to be satisfied to evaluate to `true` overall. + +You can also combine one of the supported comparison operators with the special +`AT LEAST ()` operator to require an arbitrary number of elements +to satisfy the condition to evaluate to `true`. You can use a static number or +calculate it dynamically using an expression. + +```aql +[ 1, 2, 3 ] ALL IN [ 2, 3, 4 ] // false +[ 1, 2, 3 ] ALL IN [ 1, 2, 3 ] // true +[ 1, 2, 3 ] NONE IN [ 3 ] // false +[ 1, 2, 3 ] NONE IN [ 23, 42 ] // true +[ 1, 2, 3 ] ANY IN [ 4, 5, 6 ] // false +[ 1, 2, 3 ] ANY IN [ 1, 42 ] // true +[ 1, 2, 3 ] ANY == 2 // true +[ 1, 2, 3 ] ANY == 4 // false +[ 1, 2, 3 ] ANY > 0 // true +[ 1, 2, 3 ] ANY <= 1 // true +[ 1, 2, 3 ] NONE < 99 // false +[ 1, 2, 3 ] NONE > 10 // true +[ 1, 2, 3 ] ALL > 2 // false +[ 1, 2, 3 ] ALL > 0 // true +[ 1, 2, 3 ] ALL >= 3 // false +["foo", "bar"] ALL != "moo" // true +["foo", "bar"] NONE == "bar" // false +["foo", "bar"] ANY == "foo" // true + +[ 1, 2, 3 ] AT LEAST (2) IN [ 2, 3, 4 ] // true +["foo", "bar"] AT LEAST (1+1) == "foo" // false +``` + +Note that these operators do not utilize indexes in regular queries. +The operators are also supported in [SEARCH expressions](high-level-operations/search.md), +where ArangoSearch's indexes can be utilized. The semantics differ however, see +[AQL `SEARCH` operation](high-level-operations/search.md#array-comparison-operators). + +## Logical operators + +The following logical operators are supported in AQL: + +- `&&` logical and operator +- `||` logical or operator +- `!` logical not/negation operator + +AQL also supports the following alternative forms for the logical operators: + +- `AND` logical and operator +- `OR` logical or operator +- `NOT` logical not/negation operator + +The alternative forms are aliases and functionally equivalent to the regular +operators. + +The two-operand logical operators in AQL are executed with short-circuit +evaluation (except if one of the operands is or includes a subquery. In this +case the subquery is pulled out an evaluated before the logical operator). + +The result of the logical operators in AQL is defined as follows: + +- `lhs && rhs` returns `lhs` if it is `false` or would be `false` when converted + to a boolean. If `lhs` is `true` or would be `true` when converted to a boolean, + `rhs` is returned. +- `lhs || rhs` returns `lhs` if it is `true` or would be `true` when converted + to a boolean. If `lhs` is `false` or would be `false` when converted to a boolean, + `rhs` is returned. +- `! value` returns the negated value of `value` converted to a boolean + +```aql +u.age > 15 && u.address.city != "" +true || false +NOT u.isInvalid +1 || ! 0 +``` + +Passing non-boolean values to a logical operator is allowed. Any non-boolean operands +are casted to boolean implicitly by the operator, without making the query abort. + +The *conversion to a boolean value* works as follows: +- `null` is converted to `false` +- boolean values remain unchanged +- all numbers unequal to zero are `true`, zero is `false` +- an empty string is `false`, all other strings are `true` +- arrays (`[ ]`) and objects / documents (`{ }`) are `true`, regardless of their contents + +The result of *logical and* and *logical or* operations can now have any data +type and is not necessarily a boolean value. + +For example, the following logical operations return boolean values: + +```aql +25 > 1 && 42 != 7 // true +22 IN [ 23, 42 ] || 23 NOT IN [ 22, 7 ] // true +25 != 25 // false +``` + +… whereas the following logical operations do not return boolean values: + +```aql + 1 || 7 // 1 +null || "foo" // "foo" +null && true // null +true && 23 // 23 +``` + +## Arithmetic operators + +Arithmetic operators perform an arithmetic operation on two numeric +operands. The result of an arithmetic operation is again a numeric value. + +AQL supports the following arithmetic operators: + +- `+` addition +- `-` subtraction +- `*` multiplication +- `/` division +- `%` modulus + +Unary plus and unary minus are supported as well: + +```aql +LET x = -5 +LET y = 1 +RETURN [-x, +y] +// [5, 1] +``` + +For exponentiation, there is a [numeric function](functions/numeric.md#pow) `POW()`. +The syntax `base ** exp` is not supported. + +For string concatenation, you must use the [`CONCAT()` string function](functions/string.md#concat). +Combining two strings with a plus operator (`"foo" + "bar"`) does not work! +Also see [Common Errors](common-errors.md). + +```aql +1 + 1 +33 - 99 +12.4 * 4.5 +13.0 / 0.1 +23 % 7 +-15 ++9.99 +``` + +The arithmetic operators accept operands of any type. Passing non-numeric values to an +arithmetic operator casts the operands to numbers using the type casting rules +applied by the [`TO_NUMBER()`](functions/type-check-and-cast.md#to_number) function: + +- `null` is converted to `0` +- `false` is converted to `0`, `true` is converted to `1` +- a valid numeric value remains unchanged, but NaN and Infinity are converted to `0` +- string values are converted to a number if they contain a valid string representation + of a number. Any whitespace at the start or the end of the string is ignored. Strings + with any other contents are converted to the number `0` +- an empty array is converted to `0`, an array with one member is converted to the numeric + representation of its sole member. Arrays with more members are converted to the number + `0`. +- objects / documents are converted to the number `0`. + +An arithmetic operation that produces an invalid value, such as `1 / 0` +(division by zero), produces a result value of `null`. The query is not +aborted, but you may see a warning. + +```aql + 1 + "a" // 1 + 1 + "99" // 100 + 1 + null // 1 +null + 1 // 1 + 3 + [ ] // 3 + 24 + [ 2 ] // 26 + 24 + [ 2, 4 ] // 24 + 25 - null // 25 + 17 - true // 16 + 23 * { } // 0 + 5 * [ 7 ] // 35 + 24 / "12" // 2 + 1 / 0 // null (with a 'division by zero' warning) +``` + +## Ternary operator + +AQL also supports a ternary operator that can be used for conditional +evaluation. The ternary operator expects a boolean condition as its first +operand, and it returns the result of the second operand if the condition +evaluates to true, and the third operand otherwise. +You may use [subqueries](fundamentals/subqueries.md) as operands. + +In the following example, the expression returns `u.userId` if `u.age` is +greater than 15 or if `u.active` is `true`. Otherwise it returns `null`: + +```aql +u.age > 15 || u.active == true ? u.userId : null +``` + +There is also a shortcut variant of the ternary operator with just two +operands. This variant can be used if the expression for the boolean +condition and the return value should be the same. + +In the following example, the expression evaluates to `u.value` if `u.value` is +truthy. Otherwise, a fixed string is given back: + +```aql +u.value ? : 'value is null, 0 or not present' +``` + +The condition (here just `u.value`) is only evaluated once if the second +operand between `?` and `:` is omitted, whereas it would be evaluated twice +in case of `u.value ? u.value : 'value is null'`. + +{{< info >}} +Subqueries that are used inside expressions are pulled out of these +expressions and executed beforehand. That means that subqueries do not +participate in lazy evaluation of operands, for example, in the +ternary operator. Also see +[evaluation of subqueries](fundamentals/subqueries.md#evaluation-of-subqueries). +{{< /info >}} + +## Range operator + +AQL supports expressing simple numeric ranges with the `..` operator. +This operator can be used to easily iterate over a sequence of numeric +values. + +The `..` operator produces an array of the integer values in the +defined range, with both bounding values included. + +```aql +2010..2013 +``` + +The above example produces the following result: + +```json +[ 2010, 2011, 2012, 2013 ] +``` + +Using the range operator is equivalent to writing an array with the integer +values in the range specified by the bounds of the range. If the bounds of +the range operator are non-integers, they are converted to integer values first. + +There is also a [`RANGE()` function](functions/numeric.md#range). + +## Array operators + +AQL provides different array operators: + +- `[n]` to [access the array element](#indexed-value-access) at index `n` +- `[*]` for [expanding array variables](#array-expansion) +- `[**]`, `[***]` etc. for [flattening arrays](#array-contraction) +- `[* ...]`, `[** ...]` etc. for filtering, limiting, and projecting arrays using + [inline expressions](#inline-expressions) +- `[? ...]` for nested search, known as the [question mark operator](#question-mark-operator) + +### Indexed value access + +You can access individual array elements by their position using the `[]` accessor. +The position is called the *index* and starts at `0`. + +When specifying an index, use a numeric integer value. You can use negative +index values to access array elements starting from the end of the array. +This is convenient if the length of the array is unknown and you want to access +elements at the end of the array. + +You can also use an expression and calculate the index of an element. + +{{< info >}} +If you try to access an array element with an out-of-bounds index (after the last +element or before the first element), the result is a `null` value without +raising an error or warning. +{{< /info >}} + +```aql +LET friends = [ "tina", "helga", "alfred" ] + +friends[0] // access 1st array element (elements start at index 0) +friends[2] // access 3rd array element + +friends[-1] // access last array element +friends[-2] // access second to last array element + +friends[LENGTH(friends) / 2] // access array element in the middle (floored) +``` + +### Array expansion + +In order to access a named attribute from all elements in an array easily, AQL +offers the shortcut operator `[*]` for array variable expansion. + +Using the `[*]` operator with an array variable will iterate over all elements +in the array, thus allowing to access a particular attribute of each element. It is +required that the expanded variable is an array. The result of the `[*]` +operator is again an array. + +To demonstrate the array expansion operator, let's go on with the following three +example *users* documents: + +```json +[ + { + "name": "john", + "age": 35, + "friends": [ + { "name": "tina", "age": 43 }, + { "name": "helga", "age": 52 }, + { "name": "alfred", "age": 34 } + ] + }, + { + "name": "yves", + "age": 24, + "friends": [ + { "name": "sergei", "age": 27 }, + { "name": "tiffany", "age": 25 } + ] + }, + { + "name": "sandra", + "age": 40, + "friends": [ + { "name": "bob", "age": 32 }, + { "name": "elena", "age": 48 } + ] + } +] +``` + +With the `[*]` operator it becomes easy to query just the names of the +friends for each user: + +```aql +FOR u IN users + RETURN { name: u.name, friends: u.friends[*].name } +``` + +This will produce: + +```json +[ + { "name" : "john", "friends" : [ "tina", "helga", "alfred" ] }, + { "name" : "yves", "friends" : [ "sergei", "tiffany" ] }, + { "name" : "sandra", "friends" : [ "bob", "elena" ] } +] +``` + +This is a shortcut for the longer, semantically equivalent query: + +```aql +FOR u IN users + RETURN { name: u.name, friends: (FOR f IN u.friends RETURN f.name) } +``` + +### Array contraction + +In order to collapse (or flatten) results in nested arrays, AQL provides the `[**]` +operator. It works similar to the `[*]` operator, but additionally collapses nested +arrays. + +How many levels are collapsed is determined by the amount of asterisk characters used. +`[**]` collapses one level of nesting - just like `FLATTEN(array)` or `FLATTEN(array, 1)` +would do -, `[***]` collapses two levels - the equivalent to `FLATTEN(array, 2)` - and +so on. + +Let's compare the array expansion operator with an array contraction operator. +For example, the following query produces an array of friend names per user: + +```aql +FOR u IN users + RETURN u.friends[*].name +``` + +As we have multiple users, the overall result is a nested array: + +```json +[ + [ + "tina", + "helga", + "alfred" + ], + [ + "sergei", + "tiffany" + ], + [ + "bob", + "elena" + ] +] +``` + +If the goal is to get rid of the nested array, we can apply the `[**]` operator on the +result. But simply appending `[**]` to the query won't help, because *u.friends* +is not a nested (multi-dimensional) array, but a simple (one-dimensional) array. Still, +the `[**]` can be used if it has access to a multi-dimensional nested result. + +We can extend above query as follows and still create the same nested result: + +```aql +RETURN ( + FOR u IN users RETURN u.friends[*].name +) +``` + +By now appending the `[**]` operator at the end of the query... + +```aql +RETURN ( + FOR u IN users RETURN u.friends[*].name +)[**] +``` + +... the query result becomes: + +```json +[ + [ + "tina", + "helga", + "alfred", + "sergei", + "tiffany", + "bob", + "elena" + ] +] +``` + +Note that the elements are not de-duplicated. For a flat array with only unique +elements, a combination of [`UNIQUE()`](functions/array.md#unique) and +[`FLATTEN()`](functions/array.md#flatten) is advisable. + +### Inline expressions + +It is possible to filter elements while iterating over an array, to limit the amount +of returned elements and to create a projection using the current array element. +Sorting is not supported by this shorthand form. + +These inline expressions can follow array expansion and contraction operators +`[* ...]`, `[** ...]` etc. The keywords `FILTER`, `LIMIT` and `RETURN` +must occur in this order if they are used in combination, and can only occur once: + +anyArray[* FILTER conditions LIMIT skip,limit RETURN projection] + +Example with nested numbers and array contraction: + +```aql +LET arr = [ [ 1, 2 ], 3, [ 4, 5 ], 6 ] +RETURN arr[** FILTER CURRENT % 2 == 0] +``` + +All even numbers are returned in a flat array: + +```json +[ + [ 2, 4, 6 ] +] +``` + +Complex example with multiple conditions, limit and projection: + +```aql +FOR u IN users + RETURN { + name: u.name, + friends: u.friends[* FILTER CONTAINS(CURRENT.name, "a") AND CURRENT.age > 40 + LIMIT 2 + RETURN CONCAT(CURRENT.name, " is ", CURRENT.age) + ] + } +``` + +No more than two computed strings based on *friends* with an `a` in their name and +older than 40 years are returned per user: + +```json +[ + { + "name": "john", + "friends": [ + "tina is 43", + "helga is 52" + ] + }, + { + "name": "sandra", + "friends": [ + "elena is 48" + ] + }, + { + "name": "yves", + "friends": [] + } +] +``` + +#### Inline filter + +To return only the names of friends that have an *age* value +higher than the user herself, an inline `FILTER` can be used: + +```aql +FOR u IN users + RETURN { name: u.name, friends: u.friends[* FILTER CURRENT.age > u.age].name } +``` + +The pseudo-variable *CURRENT* can be used to access the current array element. +The `FILTER` condition can refer to *CURRENT* or any variables valid in the +outer scope. + +#### Inline limit + +The number of elements returned can be restricted with `LIMIT`. It works the same +as the [limit operation](high-level-operations/limit.md). `LIMIT` must come after `FILTER` +and before `RETURN`, if they are present. + +```aql +FOR u IN users + RETURN { name: u.name, friends: u.friends[* LIMIT 1].name } +``` + +Above example returns one friend each: + +```json +[ + { "name": "john", "friends": [ "tina" ] }, + { "name": "sandra", "friends": [ "bob" ] }, + { "name": "yves", "friends": [ "sergei" ] } +] +``` + +A number of elements can also be skipped and up to *n* returned: + +```aql +FOR u IN users + RETURN { name: u.name, friends: u.friends[* LIMIT 1,2].name } +``` + +The example query skips the first friend and returns two friends at most +per user: + +```json +[ + { "name": "john", "friends": [ "helga", "alfred" ] }, + { "name": "sandra", "friends": [ "elena" ] }, + { "name": "yves", "friends": [ "tiffany" ] } +] +``` + +#### Inline projection + +To return a projection of the current element, use `RETURN`. If a `FILTER` is +also present, `RETURN` must come later. + +```aql +FOR u IN users + RETURN u.friends[* RETURN CONCAT(CURRENT.name, " is a friend of ", u.name)] +``` + +The above will return: + +```json +[ + [ + "tina is a friend of john", + "helga is a friend of john", + "alfred is a friend of john" + ], + [ + "sergei is a friend of yves", + "tiffany is a friend of yves" + ], + [ + "bob is a friend of sandra", + "elena is a friend of sandra" + ] +] +``` + +### Question mark operator + +You can use the `[? ... ]` operator on arrays to check whether the elements +fulfill certain criteria, and you can specify how often they should be satisfied. +The operator is similar to an inline filter but with an additional length check +and it evaluates to `true` or `false`. + +The following example shows how to check whether two of numbers in the array +are even: + +```aql +LET arr = [ 1, 2, 3, 4 ] +RETURN arr[? 2 FILTER CURRENT % 2 == 0] // true +``` + +The number `2` after the `?` is the quantifier. It is optional and defaults to +`ANY`. The following quantifiers are supported: + +- Integer numbers for exact quantities (e.g. `2`) +- Number ranges for a quantity between the two values (e.g. `2..3`) +- `NONE` (equivalent to `0`) +- `ANY` +- `ALL` +- `AT LEAST` + +The quantifier needs to be followed by a `FILTER` operation if you want to specify +conditions. You can refer to the current array element via the `CURRENT` +pseudo-variable in the filter expression. If you leave out the quantifier and +`FILTER` operation (only `arr[?]`), then `arr` is checked whether it is an array +and if it has at least one element. + +The question mark operator is a shorthand for an inline filter with a +surrounding length check. The following table compares both variants: + +| Question mark operator | Inline filter with length check | +|:-----------------------|:--------------------------------| +| `arr[? FILTER ]` | `LENGTH(arr[* FILTER ]) == ` +| `arr[? .. FILTER ]` | `IN_RANGE(LENGTH(arr[* FILTER ]), , , true, true)` +| `arr[? NONE FILTER ]` | `LENGTH(arr[* FILTER ]) == 0` +| `arr[? ANY FILTER ]` | `LENGTH(arr[* FILTER ]) > 0` +| `arr[? ALL FILTER ]` | `LENGTH(arr[* FILTER ]) == LENGTH(arr)` +| `arr[? AT LEAST () FILTER ]` | `LENGTH(arr[* FILTER ]) >= ` +| `arr[?]` | `LENGTH(arr[*]) > 0` +{.fixed} + +The question mark operator can be used for nested search (Enterprise Edition only): +- [Nested search with ArangoSearch](../indexes-and-search/arangosearch/nested-search.md) using Views +- Nested search using [Inverted indexes](../indexes-and-search/indexing/working-with-indexes/inverted-indexes.md#nested-search-enterprise-edition) + +## Object operators + +- `.` and `[expr]` for [accessing an object attribute](#attribute-access) + +### Attribute access + +You can access individual object attributes by their names using the +dot accessor `.` and the square bracket accessor `[]`. + +The dot accessor lets you specify the attribute name as an unquoted string. +This is only possible if the attribute name would be valid as a +[variable name](fundamentals/syntax.md#variable-names). Otherwise, you need to +quote the name with backticks or forward ticks, or use the square bracket accessor. + +You can also use the dot accessor together with a [bind parameter](fundamentals/bind-parameters.md) +to select an attribute or sub-attribute. + +```aql +LET ob = { name: "sandra", "with space": true } + +LET unquoted = ob.name + +LET quoted_1 = ob.`with space` +LET quoted_2 = ob.´with space´ + +LET bindvar = ob.@attr +``` + +The square bracket accessor lets you specify an expression to select an attribute. +This is usually a quoted string literal but you can also calculate the name +dynamically using an arbitrary expression. + +You can also use the square bracket accessor together with a +[bind parameter](fundamentals/bind-parameters.md) to select an attribute. + +```aql +LET ob = { name: "sandra", "with 2 spaces": true } + +LET literal_1 = ob["name"] +LET literal_2 = ob["with 2 spaces"] + +LET attribute = "name" +LET variable = ob[attribute] + +LET expression = ob[CONCAT_SEPARATOR(" ", "with", 1+1, "spaces")] + +LET bindvar = ob[@attr] +``` + +{{< info >}} +If you try to access a non-existing attribute in one way or another, the result +is a `null` value without raising an error or warning. +{{< /info >}} + +## Operator precedence + +The operator precedence in AQL is similar as in other familiar languages +(highest precedence first): + +| Operator(s) | Description +|:---------------------|:----------- +| `::` | scope (user-defined AQL functions) +| `[*]` | array expansion +| `[]` | indexed value access (arrays), attribute access (objects) +| `.` | attribute access (objects) +| `()` | function call +| `!`, `NOT`, `+`, `-` | unary not (logical negation), unary plus, unary minus +| `*`, `/`, `%` | multiplication, division, modulus +| `+`, `-` | addition, subtraction +| `..` | range operator +| `<`, `<=`, `>=`, `>` | less than, less equal, greater equal, greater than +| `IN`, `NOT IN` | in operator, not in operator +| `==`, `!=`, `LIKE`, `NOT LIKE`, `=~`, `!~` | equality, inequality, wildcard match, wildcard non-match, regex match, regex non-match +| `AT LEAST` | at least modifier (array comparison operator, question mark operator) +| `OUTBOUND`, `INBOUND`, `ANY`, `ALL`, `NONE` | graph traversal directions, array comparison operators, question mark operator +| `&&`, `AND` | logical and +| `\|\|`, `OR` | logical or +| `INTO` | into operator (INSERT / UPDATE / REPLACE / REMOVE / COLLECT operations) +| `WITH` | with operator (WITH / UPDATE / REPLACE / COLLECT operations) +| `=` | variable assignment (LET / COLLECT operations, AGGREGATE / PRUNE clauses) +| `?`, `:` | ternary operator, object literals +| `DISTINCT` | distinct modifier (RETURN operations) +| `,` | comma separator + +The parentheses `(` and `)` can be used to enforce a different operator +evaluation order. diff --git a/site/content/arangodb/oem/aql/user-defined-functions.md b/site/content/arangodb/oem/aql/user-defined-functions.md new file mode 100644 index 0000000000..78376f0d24 --- /dev/null +++ b/site/content/arangodb/oem/aql/user-defined-functions.md @@ -0,0 +1,405 @@ +--- +title: Extending AQL with user-defined functions +menuTitle: User-defined Functions +weight: 45 +description: >- + You can write UDFs in JavaScript to extend AQL or to simplify queries +--- +AQL comes with a [built-in set of functions](functions/_index.md), but it is +not a fully-featured programming language. To add missing functionality or to +simplify queries, you may write your own user-defined functions (**UDFs**) in +JavaScript and make them available in AQL. + +## Known Limitations + +{{< warning >}} +UDFs can have serious effects on the performance of your queries and the resource +usage in ArangoDB. Especially in cluster setups they should not be used against +much data, because this data will need to be sent over the network back and forth +between _DB-Servers_ and _Coordinators_, potentially adding a lot of latency. +This can be mitigated by very selective `FILTER`s before calls to UDFs. +{{< /warning >}} + +Since the optimizer doesn't know anything about the nature of your function, +**the optimizer can't use indexes for UDFs**. So you should never lean on a UDF +as the primary criterion for a `FILTER` statement to reduce your query result set. +Instead, put a another `FILTER` statement in front of it. You should make sure +that this [**`FILTER` statement** is effective](execution-and-performance/query-optimization.md) +to reduce the query result before passing it to your UDF. + +Rule of thumb is, the closer the UDF is to your final `RETURN` statement +(or maybe even inside it), the better. + +When used in clusters, UDFs are always executed on a +[Coordinator](../deploy/cluster/_index.md). +It is not possible to execute UDFs on DB-Servers, as no JavaScript execution +engine is available on DB-Servers. Queries that would push UDF execution to +DB-Servers are aborted with a parse error. This includes using UDFs in traversal +`PRUNE` conditions, as well as `FILTER` conditions that can be moved into the +traversal execution on a DB-Server. These limitations also apply to the +single server deployment mode to keep the differences to cluster deployments minimal. + +As UDFs are written in JavaScript, each query that executes a UDF will acquire +one V8 context to execute the UDFs in it. V8 contexts can be re-used across subsequent +queries, but when UDF-invoking queries run in parallel, they will each require a +dedicated V8 context. + +Because UDFs use the V8 JavaScript engine, the engine's default memory limit of 512 MB is applied. + +Using UDFs in clusters may thus result in a higher resource allocation +in terms of used V8 contexts and server threads. If you run out +of these resources, your query may abort with a +[**cluster backend unavailable**](../develop/error-codes.md) error. + +To overcome these mentioned limitations, you may want to increase the +[number of available V8 contexts](../components/arangodb-server/options.md#--javascriptv8-contexts) +(at the expense of increased memory usage), and the +[number of available server threads](../components/arangodb-server/options.md#--servermaximal-threads). + +In addition, modification of global JavaScript variables from inside UDFs is +unsupported, as is reading or changing the data of any collection or running +queries from inside an AQL user function. + +## Naming + +AQL functions that are implemented with JavaScript are always in a namespace. +To register a user-defined AQL function, you need to give it a name with a +namespace. The `::` symbol is used as the namespace separator, for example, +`MYGROUP::MYFUNC`. You can use one or multiple levels of namespaces to create +meaningful function groups. + +The names of user-defined functions are case-insensitive, like all function +names in AQL. + +To refer to and call user-defined functions in AQL queries, you need to use the +fully qualified name with the namespaces: + +```aql +MYGROUP::MYFUNC() +MYFUNCTIONS::MATH::RANDOM() +``` + +ArangoDB's built-in AQL functions are all implemented in C++ and are not in a +namespace, except for the internal `V8()` function, which resides in the `_aql` +namespace. It is the default namespace, which means that you can use the +unqualified name of the function (without `_aql::`) to refer to it. Note that +you cannot add own functions to this namespace. + +## Variables and side effects + +User functions can take any number of input arguments and should +provide one result via a `return` statement. User functions should be kept +purely functional and thus free of side effects and state, and state modification. + +{{< warning >}} +Modification of global variables is unsupported, as is reading or changing +the data of any collection or running queries from inside an AQL user function. +{{< /warning >}} + +User function code is late-bound, and may thus not rely on any variables +that existed at the time of declaration. If user function code requires +access to any external data, it must take care to set up the data by +itself. + +All AQL user function-specific variables should be introduced with the `var`, +`let`, or `const` keywords in order to not accidentally access already defined +variables from outer scopes. Not using a declaration keyword for own variables +may cause side effects when executing the function. + +Here is an example that may modify outer scope variables `i` and `name`, +making the function **not** side-effect free: + +```js +function (values) { + for (i = 0; i < values.length; ++i) { + name = values[i]; + if (name === "foo") { + return i; + } + } + return null; +} +``` + +The above function can be made free of side effects by using the `var`, `let`, +or `const` keywords, so the variables become function-local variables: + +```js +function (values) { + for (let i = 0; i < values.length; ++i) { + let name = values[i]; + if (name === "foo") { + return i; + } + } + return null; +} +``` + +## Input parameters + +In order to return a result, a user function should use a `return` instruction +rather than modifying its input parameters. + +AQL user functions are allowed to modify their input parameters for input +parameters that are null, boolean, numeric or string values. Modifying these +input parameter types inside a user function should be free of side effects. +However, user functions should not modify input parameters if the parameters are +arrays or objects and as such passed by reference, as that may modify variables +and state outside of the user function itself. + +## Return values + +User functions must only return primitive types (i.e. `null`, boolean +values, numeric values, string values) or aggregate types (arrays or +objects) composed of these types. +Returning any other JavaScript object type (Function, Date, RegExp etc.) from +a user function may lead to undefined behavior and should be avoided. + +## Enforcing strict mode + +By default, any user function code is executed in *sloppy mode*. In order to +make a user function run in strict mode, use `"use strict"` explicitly inside +the user function: + +```js +function (values) { + "use strict" + + for (let i = 0; i < values.length; ++i) { + let name = values[i]; + if (name === "foo") { + return i; + } + } + return null; +} +``` + +Any violation of the strict mode triggers a runtime error. + +## Registering and unregistering user functions + +User-defined functions (UDFs) can be registered in the selected database +using the `@arangodb/aql/functions` module as follows: + +```js +var aqlfunctions = require("@arangodb/aql/functions"); +``` + +To register a function, the fully qualified function name plus the +function code must be specified. This can easily be done in +[arangosh](../components/tools/arangodb-shell/_index.md). The +[HTTP Interface](../develop/http-api/queries/user-defined-aql-functions.md) also offers +User Functions management. + +In a cluster setup, make sure to connect to a Coordinator to manage the UDFs. + +Documents in the `_aqlfunctions` collection (or any other system collection) +should not be accessed directly, but only via the dedicated interfaces. +Otherwise you might see caching issues or accidentally break something. +The interfaces ensure the correct format of the documents and invalidate +the UDF cache. + +### Registering an AQL user function + +For testing, it may be sufficient to directly type the function code in the shell. +To manage more complex code, you may write it in the code editor of your choice +and save it as file. For example: + +```js +/* path/to/file.js */ +'use strict'; + +function greeting(name) { + if (name === undefined) { + name = "World"; + } + return `Hello ${name}!`; +} + +module.exports = greeting; +``` + +Then require it in the shell in order to register a user-defined function: + +```js +arangosh> var func = require("path/to/file.js"); +arangosh> aqlfunctions.register("HUMAN::GREETING", func, true); +``` + +Note that a return value of `false` means that the function `HUMAN::GREETING` +was newly created, and not that it failed to register. `true` is returned +if a function of that name existed before and was just updated. + +`aqlfunctions.register(name, code, isDeterministic)` + +Registers an AQL user function, identified by a fully qualified function +name. The function code in `code` must be specified as a JavaScript +function or a string representation of a JavaScript function. +If the function code in `code` is passed as a string, it is required that +the string evaluates to a JavaScript function definition. + +If a function identified by `name` already exists, the previous function +definition is updated. Please also make sure that the function code +does not violate the conventions for AQL functions, in particular with regards +to the [naming](#naming) and [side-effects](#variables-and-side-effects). + +The `isDeterministic` attribute can be used to specify whether the +function results are fully deterministic (i.e. depend solely on the input +and are the same for repeated calls with the same input values). It is not +used at the moment but may be used for optimizations later. + +The registered function is stored in the selected database's system +collection `_aqlfunctions`. + +The function returns `true` when it updates/replaces an existing AQL +function of the same name, and `false` otherwise. It throws an exception +if it detects syntactically invalid function code. + +**Examples** + +```js +require("@arangodb/aql/functions").register("MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT", +function (celsius) { + return celsius * 1.8 + 32; +}); +``` + +The function code is not executed in *strict mode* or *strong mode* by +default. In order to make a user function being run in strict mode, use +`use strict` explicitly, e.g.: + +```js +require("@arangodb/aql/functions").register("MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT", +function (celsius) { + "use strict"; + return celsius * 1.8 + 32; +}); +``` + +You can access the name under which the AQL function is registered by accessing +the `name` property of `this` inside the JavaScript code: + +```js +require("@arangodb/aql/functions").register("MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT", +function (celsius) { + "use strict"; + if (typeof celsius === "undefined") { + const error = require("@arangodb").errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH; + AQL_WARNING(error.code, require("util").format(error.message, this.name, 1, 1)); + } + return celsius * 1.8 + 32; +}); +``` + +`AQL_WARNING()` is automatically available to the code of user-defined +functions. The error code and message is retrieved via `@arangodb` module. +The *argument number mismatch* message has placeholders, which we can substitute +using [format()](http://nodejs.org/api/util.html): + +``` +invalid number of arguments for function '%s()', expected number of arguments: minimum: %d, maximum: %d +``` + +In the example above, `%s` is replaced by `this.name` (the AQL function name), +and both `%d` placeholders by `1` (number of expected arguments). If you call +the function without an argument, you see this: + +```js +arangosh> db._query("RETURN MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT()") +[object ArangoQueryCursor, count: 1, hasMore: false, warning: 1541 - invalid +number of arguments for function 'MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT()', +expected number of arguments: minimum: 1, maximum: 1] + +[ + null +] +``` + +### Deleting an existing AQL user function + +`aqlfunctions.unregister(name)` + +Unregisters an existing AQL user function, identified by the fully qualified +function name. + +Trying to unregister a function that does not exist results in an +exception. + +**Examples** + +```js +require("@arangodb/aql/functions").unregister("MYFUNCTIONS::TEMPERATURE::CELSIUSTOFAHRENHEIT"); +``` + +### Unregister group + +Delete a group of AQL user functions: + +`aqlfunctions.unregisterGroup(prefix)` + +Unregisters a group of AQL user function, identified by a common function +group prefix. + +This returns the number of functions unregistered. + +**Examples** + +```js +require("@arangodb/aql/functions").unregisterGroup("MYFUNCTIONS::TEMPERATURE"); + +require("@arangodb/aql/functions").unregisterGroup("MYFUNCTIONS"); +``` + +### Listing all AQL user functions + +`aqlfunctions.toArray()` + +Returns all previously registered AQL user functions, with their fully +qualified names and function code. + +--- + +`aqlfunctions.toArray(prefix)` + +Returns all previously registered AQL user functions, restricted to a specified +group of functions by specifying a group prefix. + +**Examples** + +To list all available user functions: + +```js +require("@arangodb/aql/functions").toArray(); +``` + +To list all available user functions in the *MYFUNCTIONS* namespace: + +```js +require("@arangodb/aql/functions").toArray("MYFUNCTIONS"); +``` + +To list all available user functions in the *MYFUNCTIONS::TEMPERATURE* namespace: + +```js +require("@arangodb/aql/functions").toArray("MYFUNCTIONS::TEMPERATURE"); +``` + +## Deployment Details + +Internally, UDFs are stored in a system collection named `_aqlfunctions` +of the selected database. When an AQL statement refers to such a UDF, +it is loaded from that collection. The UDFs will be exclusively +available for queries in that particular database. + +Since the Coordinator doesn't have own local collections, the `_aqlfunctions` +collection is sharded across the cluster. Therefore (as usual), it has to be +accessed through a Coordinator - you mustn't talk to the shards directly. +Once it is in the `_aqlfunctions` collection, it is available on all +Coordinators without additional effort. + +Keep in mind that system collections are excluded from dumps created with +[arangodump](../components/tools/arangodump/_index.md) by default. +To include AQL UDF in a dump, the dump needs to be started with +the option *--include-system-collections true*. diff --git a/site/content/arangodb/oem/components/_index.md b/site/content/arangodb/oem/components/_index.md new file mode 100644 index 0000000000..e5da4f23ad --- /dev/null +++ b/site/content/arangodb/oem/components/_index.md @@ -0,0 +1,6 @@ +--- +title: Components +menuTitle: Components +weight: 165 +description: '' +--- diff --git a/site/content/arangodb/oem/components/arangodb-server/_index.md b/site/content/arangodb/oem/components/arangodb-server/_index.md new file mode 100644 index 0000000000..23992ba2e3 --- /dev/null +++ b/site/content/arangodb/oem/components/arangodb-server/_index.md @@ -0,0 +1,21 @@ +--- +title: ArangoDB Server +menuTitle: ArangoDB Server +weight: 170 +description: >- + The ArangoDB daemon (arangod) is the central server binary that can run in + different modes for a variety of setups like single server and clusters +--- +The ArangoDB server is the core component of ArangoDB. The executable file to +run it is named `arangod`. The `d` stands for daemon. A daemon is a long-running +background process that answers requests for services. + +The server process serves the various client connections to the server via the +TCP/HTTP protocol. It also provides a [web interface](../web-interface/_index.md). + +_arangod_ can run in different modes for a variety of setups like single server +and clusters. It differs between the [Community Edition](../../features/community-edition.md) +and [Enterprise Edition](../../features/enterprise-edition.md). + +See [Administration](../../operations/administration/_index.md) for server configuration +and [Deploy](../../deploy/_index.md) for operation mode details. diff --git a/site/content/arangodb/oem/components/arangodb-server/environment-variables.md b/site/content/arangodb/oem/components/arangodb-server/environment-variables.md new file mode 100644 index 0000000000..4188515b33 --- /dev/null +++ b/site/content/arangodb/oem/components/arangodb-server/environment-variables.md @@ -0,0 +1,108 @@ +--- +title: ArangoDB Server environment variables +menuTitle: Environment variables +weight: 15 +description: >- + Environment variables used by `arangod` +--- +`arangod` inspects the following list of environment variables: + + - `ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY` + + This variable can be used to override the automatic detection of the total + amount of RAM present on the system. One can specify a decimal number + (in bytes). Furthermore, numbers can have the following suffixes: + + - `TB`, `T`, `tb`, `t`: the number is multiplied by 1,000,000,000,000 (terabytes). + - `GB`, `G`, `gb`, `g`: the number is multiplied by 1,000,000,000 (gigabytes). + - `MB`, `M`, `mb`, `m`: the number is multiplied by 1,000,000 (megabytes). + - `KB`, `K`, `kb`, `k`: the number is multiplied by 1,000 (kilobytes). + - `TIB`, `TiB`, `tib`: the number is multiplied by 1,099,511,627,776 (tebibytes). + - `GIB`, `GiB`, `gib`: the number is multiplied by 1,073,741,824 (gibibytes). + - `MIB`, `MiB`, `mib`: the number is multiplied by 1,048,576 (mebibytes). + - `KIB`, `KiB`, `kib`: the number is multiplied by 1,024 (kibibytes). + - `B`, `b`: bytes + + The total amount of RAM detected is logged as an INFO message at + server start. If the variable is set, the overridden value is shown. + Various default sizes are calculated based on this value (e.g. the + RocksDB buffer cache size). + + Setting this option can in particular be useful in two cases: + + 1. If `arangod` is running in a container and its cgroup has a RAM + limitation, then one should specify this limitation in this + environment variable, since it is currently not automatically + detected. + 2. If `arangod` is running alongside other services on the same + machine and thus sharing the RAM with them, one should limit the + amount of memory using this environment variable. + + Note that setting this environment variable mainly affects the default + values of startup options that have to do with memory usage. + If the values of these startup options are explicitly set anyway, then + setting the environment variable has no effect. + + For example, the default value for the RocksDB block cache size + (`--rocksdb.block-cache-size` startup option) depends on the amount of + available memory. If you set `ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY=32GB`, + the default value for the block cache size is `(32GB - 2GB) * 0.3 = 9GB`. + However, if you set the `--rocksdb.block-cache-size` startup option explicitly + via a configuration file or via the command-line, then the latter value is + used, and not the option's default value based on the + `ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY` environment variable. + + - `ARANGODB_OVERRIDE_DETECTED_NUMBER_OF_CORES` + + This variable can be used to override the automatic detection of the + number of CPU cores present on the system. + + The number of CPU cores detected is logged as an INFO message at + server start. If the variable is set, the overridden value is shown. + Various default values for threading are calculated based on this value. + + Setting this option is useful if `arangod` is running in a container + or alongside other services on the same machine and shall not use + all available CPUs. + + - `ARANGODB_OVERRIDE_CRASH_HANDLER` + + This variable can be used to toggle the built-in crash handler in the + Linux builds of `arangod`. The crash handler is turned on by default + for Linux builds, and it can be turned off by setting this environment + variable to an empty string, the value of `0` or `off`. + +- `CACHE_OBLIVIOUS` _(introduced in v3.9.7, v3.10.3)_ + + If set to the string `true`, jemalloc allocates one additional page + (4096 bytes) for every allocation of 16384 or more bytes to change the + base address if it is not divisible by 4096. This can help the CPU caches if + the beginning of such blocks are accessed a lot. + + On the other hand, it increases the memory usage because of the page alignment. + The RocksDB buffer cache does most of its allocations for 16384 bytes, + increasing the RAM usage by 25%. Setting the option to `false` disables the + optimization but the performance is expected to be the same for ArangoDB. + + The default is `true` in 3.9 and 3.10 up to v3.10.3. From v3.10.4 onwards, + the default is `false`. + + Also see the [jemalloc documentation](http://jemalloc.net/jemalloc.3.html#opt.cache_oblivious). + +- `TZ_DATA` _(introduced in v3.8.0)_ + + This variable can be used to specify the path to the directory containing + the timezone information database for ArangoDB. That directory is normally + named `tzdata` and is shipped with ArangoDB releases. It is normally not + required to set this environment variable, but it may be necessary in + unusual setups with non-conventional directory layouts and paths. + +- `IRESEARCH_TEXT_STOPWORD_PATH` + + Path to a directory with stop word files for + [ArangoSearch Text Analyzers](../../indexes-and-search/analyzers.md#text). + + + +For Docker specific environment variables please refer to +[Docker Hub](https://hub.docker.com/_/arangodb) diff --git a/site/content/arangodb/oem/components/arangodb-server/ldap.md b/site/content/arangodb/oem/components/arangodb-server/ldap.md new file mode 100644 index 0000000000..2fde26e69f --- /dev/null +++ b/site/content/arangodb/oem/components/arangodb-server/ldap.md @@ -0,0 +1,563 @@ +--- +title: ArangoDB Server LDAP Options +menuTitle: LDAP +weight: 10 +description: >- + LDAP authentication options in the ArangoDB server +--- +{{< tag "ArangoDB Enterprise Edition" "AMP" >}} + +## Basics Concepts + +The basic idea is that one can keep the user authentication setup for +an ArangoDB instance (single or cluster) outside of ArangoDB in an LDAP +server. A crucial feature of this is that one can add and withdraw users +and permissions by only changing the LDAP server and in particular +without touching the ArangoDB instance. Changes are effective in +ArangoDB within a few minutes. + +Since there are many different possible LDAP setups, we must support a +variety of possibilities for authentication and authorization. Here is +a short overview: + +To map ArangoDB user names to LDAP users there are two authentication +methods called "simple" and "search". In the "simple" method the LDAP bind +user is derived from the ArangoDB user name by prepending a prefix and +appending a suffix. For example, a user "alice" could be mapped to the +distinguished name `uid=alice,dc=arangodb,dc=com` to perform the LDAP +bind and authentication. +See [Simple authentication method](#simple-authentication-method) +below for details and configuration options. + +In the "search" method there are two phases. In Phase 1 a generic +read-only admin LDAP user account is used to bind to the LDAP server +first and search for an LDAP user matching the ArangoDB user name. In +Phase 2, the actual authentication is then performed against the LDAP +user that was found in phase 1. Both methods are sensible and are +recommended to use in production. +See [Search authentication method](#search-authentication-method) +below for details and configuration options. + +Once the user is authenticated, there are now two methods for +authorization: (a) "roles attribute" and (b) "roles search". + +In method (a) ArangoDB acquires a list of roles the authenticated LDAP +user has from the LDAP server. The actual access rights to databases +and collections for these roles are configured in ArangoDB itself. +Users effectively have the union of all access rights of all roles +they have. This method is probably the most common one for production use +cases. It combines the advantages of managing users and roles outside of +ArangoDB in the LDAP server with the fine grained access control within +ArangoDB for the individual roles. See [Roles attribute](#roles-attribute) +below for details about method (a) and for the associated configuration +options. + +Method (b) is very similar and only differs from (a) in the way the +actual list of roles of a user is derived from the LDAP server. +See [Roles search](#roles-search) below for details about method (b) +and for the associated configuration options. + +## Fundamental options + +The fundamental options for specifying how to access the LDAP server are +the following: + + - `--ldap.enabled` this is a boolean option which must be set to + `true` to activate the LDAP feature + - `--ldap.server` is a string specifying the host name or IP address + of the LDAP server + - `--ldap.port` is an integer specifying the port the LDAP server is + running on, the default is `389` + - `--ldap.basedn` specifies the base distinguished name under which + the search takes place (can alternatively be set via `--ldap.url`) + - `--ldap.binddn` and `--ldap.bindpasswd` are distinguished name and + password for a read-only LDAP user to which ArangoDB can bind to + search the LDAP server. Note that it is necessary to configure these + for both the "simple" and "search" authentication methods, since + even in the "simple" method, ArangoDB occasionally has to refresh + the authorization information from the LDAP server + even if the user session persists and no new authentication is + needed! It is, however, allowed to leave both empty, but then the + LDAP server must be readable with anonymous access. + - `--ldap.refresh-rate` is a floating point value in seconds. The + default is 300, which means that ArangoDB refreshes the + authorization information for authenticated users after at most 5 + minutes. This means that changes in the LDAP server like removed + users or added or removed roles for a user are effective after + at most 5 minutes. + +Note that the `--ldap.server` and `--ldap.port` options can +alternatively be specified in the `--ldap.url` string together with +other configuration options. For details see Section "LDAP URLs" below. + +Here is an example on how to configure the connection to the LDAP server, +with anonymous bind: + +``` +--ldap.enabled=true \ +--ldap.server=ldap.arangodb.com \ +--ldap.basedn=dc=arangodb,dc=com +``` + +With this configuration ArangoDB binds anonymously to the LDAP server +on host `ldap.arangodb.com` on the default port 389 and executes all searches +under the base distinguished name `dc=arangodb,dc=com`. + +If we need a user to read in LDAP here is the example for it: + +``` +--ldap.enabled=true \ +--ldap.server=ldap.arangodb.com \ +--ldap.basedn=dc=arangodb,dc=com \ +--ldap.binddn=uid=arangoadmin,dc=arangodb,dc=com \ +--ldap.bindpasswd=supersecretpassword +``` + +The connection is identical but the searches are executed with the +given distinguished name in `binddn`. + +Note here: +The given user (or the anonymous one) needs at least read access on +all user objects to find them and in the case of Roles search +also read access on the objects storing the roles. + +Up to this point ArangoDB can now connect to a given LDAP server +but it is not yet able to authenticate users properly with it. +For this pick one of the following two authentication methods. + +### LDAP URLs + +As an alternative one can specify the values of multiple LDAP related configuration +options by specifying a single LDAP URL. Here is an example: + +``` +--ldap.url ldap://ldap.arangodb.com:1234/dc=arangodb,dc=com?uid?sub +``` + +This one option has the combined effect of setting the following: + +``` +--ldap.server=ldap.arangodb.com \ +--ldap.port=1234 \ +--ldap.basedn=dc=arangodb,dc=com \ +--ldap.searchAttribute=uid \ +--ldap.searchScope=sub +``` + +That is, the LDAP URL consists of the LDAP `server` and `port`, a `basedn`, a +`searchAttribute`, and a `searchScope` which can be one of `base`, `one`, or +`sub`. There is also the possibility to use the `ldaps` protocol as in: + +``` +--ldap.url ldaps://ldap.arangodb.com:636/dc=arangodb,dc=com?uid?sub +``` + +This does exactly the same as the one above, except that it uses the +LDAP over TLS protocol. This is a non-standard method which does not +involve using the STARTTLS protocol. Note that this does not work in the +Windows version! We suggest to use the `ldap` protocol and STARTTLS +as described in the next section. + +### TLS options + +{{< warning >}} +TLS is not supported in the Windows version of ArangoDB! +{{< /warning >}} + +To configure the usage of encrypted TLS to communicate with the LDAP server +the following options are available: + +- `--ldap.tls`: the main switch to active TLS. can either be + `true` (use TLS) or `false` (do not use TLS). It is switched + off by default. If you switch this on and do not use the `ldaps` + protocol via the [LDAP URL](#ldap-urls), then ArangoDB + uses the `STARTTLS` protocol to initiate TLS. This is the + recommended approach. +- `--ldap.tls-version`: the minimal TLS version that ArangoDB should accept. + Available versions are `1.0`, `1.1` and `1.2`. The default is `1.2`. If + your LDAP server does not support Version 1.2, you have to change + this setting. +- `--ldap.tls-cert-check-strategy`: strategy to validate the LDAP server + certificate. Available strategies are `never`, `hard`, + `demand`, `allow` and `try`. The default is `hard`. +- `--ldap.tls-cacert-file`: a file path to one or more (concatenated) + certificate authority certificates in PEM format. + As default no file path is configured. This certificate + is used to validate the server response. +- `--ldap.tls-cacert-dir`: a directory path to certificate authority certificates in + [c_rehash](https://www.openssl.org/docs/man3.0/man1/c_rehash.html) + format. As default no directory path is configured. + +Assuming you have the TLS CAcert file that is given to the server at +`/path/to/certificate.pem`, here is an example on how to configure TLS: + +``` +--ldap.tls true \ +--ldap.tls-cacert-file /path/to/certificate.pem +``` + +You can use TLS with any of the following authentication mechanisms. + +### Secondary server options (`ldap2`) + +The `ldap.*` options configure the primary LDAP server. It is possible to +configure a secondary server with the `ldap2.*` options to use it as a +fail-over for the case that the primary server is not reachable, but also to +let the primary servers handle some users and the secondary others. + +Instead of `--ldap.