From 52a4f566ca583e05d5404c5b34a63fb81324fdc9 Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Mon, 24 Feb 2025 15:34:22 +0530 Subject: [PATCH 01/42] SC-5131:Update semgrep.yml (#159) --- .github/workflows/semgrep.yml | 63 +++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index bce5fc8e..019095aa 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -1,35 +1,64 @@ -name: Semgrep +name: Semgrep Scan -# Run workflow each time code is pushed to your repository. on: - push: - branches: - - main pull_request: branches: - main + jobs: build: runs-on: ubuntu-latest + permissions: + pull-requests: write # Give write permission to PRs + issues: write steps: - name: Checkout code uses: actions/checkout@v3 - - name: Install Semgrep - run: pip install semgrep + - name: Install Semgrep and jq + run: | + sudo apt install python3-venv jq + python3 -m venv .venv + .venv/bin/pip install semgrep - name: Run Semgrep run: | - semgrep --config .semgreprules/customRule.yml --config auto --severity ERROR --sarif . > results.sarif + source .venv/bin/activate + semgrep --config auto --severity ERROR --json-output=results.json --no-error + cat results.json | jq .results > pretty-results.json - - name: Upload SARIF file - uses: github/codeql-action/upload-sarif@v3 + - name: Display Raw Semgrep JSON Output + run: | + echo "Displaying raw Semgrep results..." + cat pretty-results.json + + - name: Add comment on PR if findings are found + uses: actions/github-script@v6 with: - # Path to SARIF file relative to the root of the repository - sarif_file: results.sarif + script: | + // Ensure the context has a pull_request + if (context.payload.pull_request) { + const prNumber = context.payload.pull_request.number; + const fs = require('fs'); + const results = JSON.parse(fs.readFileSync('pretty-results.json', 'utf8')); + const highFindings = results.filter(result => result.extra && result.extra.severity === 'ERROR'); - - name: Upload results - uses: actions/upload-artifact@v4 - with: - name: semgrep-results - path: results.sarif + // Comment if findings exist + if (highFindings.length > 0) { + const comment = `**Semgrep Findings:** Issues with Error level severity are found (Error is Highest severity in Semgrep), Please resolve the issues before merging.`; + await github.rest.issues.createComment({ + ...context.repo, + issue_number: prNumber, + body: comment + }); + } else { + const noIssuesComment = "**Semgrep findings:** No issues found, Good to merge."; + await github.rest.issues.createComment({ + ...context.repo, + issue_number: prNumber, + body: noIssuesComment + }); + } + } else { + console.log("This workflow wasn't triggered by a pull request, so no comment will be added."); + } From 60c46c1aa908cb563d676dd4554e0c5e248ef3b1 Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:18:01 +0530 Subject: [PATCH 02/42] SC-5790:Create Gitleaks.yml --- .github/workflows/Gitleaks.yml | 97 ++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 .github/workflows/Gitleaks.yml diff --git a/.github/workflows/Gitleaks.yml b/.github/workflows/Gitleaks.yml new file mode 100644 index 00000000..9807b5e9 --- /dev/null +++ b/.github/workflows/Gitleaks.yml @@ -0,0 +1,97 @@ +name: Gitleaks secrets scan + +on: + pull_request: + branches: + - main + + +permissions: + issues: write + pull-requests: write + contents: read + +jobs: + gitleaks: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required to get full commit history for diffing + + + - name: Get base and head commit SHAs + run: | + echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV + echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + + + - name: Run Gitleaks on PR changes via Docker + run: | + docker run --rm -v $(pwd):/repo -w /repo zricethezav/gitleaks:latest detect \ + --config="/repo/Rule/gitleaks.toml" \ + --log-opts="--no-merges $BASE_SHA..$HEAD_SHA" \ + --verbose \ + --exit-code=0 \ + --report-format=json \ + --report-path="/repo/gitleaks-report.json" \ + --redact + + - name: Upload Gitleaks report + uses: actions/upload-artifact@v4 + with: + name: gitleaks-report + path: gitleaks-report.json + + - name: Format and comment findings on PR + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [ ! -f gitleaks-report.json ]; then + echo "Report file not found!" + exit 1 + fi + + FINDINGS_JSON=$(cat gitleaks-report.json) + COUNT=$(echo "$FINDINGS_JSON" | jq 'length') + SHA="${{ github.event.pull_request.head.sha }}" + REPO="${{ github.repository }}" + PR_NUMBER="${{ github.event.pull_request.number }}" + MAX=10 + + if [ "$COUNT" -gt 0 ]; then + COMMENT="**🔐 Gitleaks Findings: $COUNT issue(s) detected**\n\n" + i=0 + while [ "$i" -lt "$COUNT" ] && [ "$i" -lt "$MAX" ]; do + ITEM=$(echo "$FINDINGS_JSON" | jq ".[$i]") + RULE=$(echo "$ITEM" | jq -r '.RuleID') + DESC=$(echo "$ITEM" | jq -r '.Description') + FILE=$(echo "$ITEM" | jq -r '.File') + LINE=$(echo "$ITEM" | jq -r '.Line') + LINK="https://github.com/$REPO/blob/$SHA/$FILE#L$LINE" + SECRET_MASKED="**********" + COMMENT+="🔸 **Rule**: \`$RULE\`\n" + COMMENT+="📄 **File**: \`$FILE:$LINE\`\n" + COMMENT+="📝 **Description**: $DESC\n" + COMMENT+="🔑 **Secret**: \`$SECRET_MASKED\`\n" + COMMENT+="🔗 **Path**: [$FILE:$LINE]($LINK)\n\n" + i=$((i + 1)) + done + + if [ "$COUNT" -gt "$MAX" ]; then + COMMENT+="...and more. Only showing first $MAX findings.\n" + fi + else + COMMENT="✅ **Gitleaks Findings:** No secrets detected. Safe to proceed!" + fi + + # Escape newlines for GitHub API + COMMENT=$(echo "$COMMENT" | sed ':a;N;$!ba;s/\n/\\n/g') + + curl -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + -d "{\"body\":\"$COMMENT\"}" \ + "https://api.github.com/repos/${REPO}/issues/${PR_NUMBER}/comments" From ed57ecb376f8be4bfad80ca46b3e37080940f3bc Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:19:06 +0530 Subject: [PATCH 03/42] SC-5790:Update and rename customRule.yml to gitleaks.toml --- .semgreprules/customRule.yml | 30 - Rule/gitleaks.toml | 3137 ++++++++++++++++++++++++++++++++++ 2 files changed, 3137 insertions(+), 30 deletions(-) delete mode 100644 .semgreprules/customRule.yml create mode 100644 Rule/gitleaks.toml diff --git a/.semgreprules/customRule.yml b/.semgreprules/customRule.yml deleted file mode 100644 index b275e280..00000000 --- a/.semgreprules/customRule.yml +++ /dev/null @@ -1,30 +0,0 @@ -rules: -- id: check-sensitive-info - message: >- - Potential sensitive information found: $1 - severity: ERROR - languages: - - yaml - - go - - javascript - - java - - python - - golang - - docker - patterns: - - pattern-regex: (?i)\b(api[_-]key|api[_-]token|api[_-]secret[_-]key|api[_-]password|token|secret[_-]key|password|auth[_-]key|auth[_-]token|AUTH_PASSWORD)\s*[:=]\s*(['"]?)((?!YOUR_EXCLUSION_PATTERN_HERE)[A-Z]+.*?)\2 - -- id: check-logger-appconfig - message: >- - Potential Logging configuration found: $1 - severity: ERROR - languages: - - yaml - - go - - javascript - - java - - python - - golang - - docker - patterns: - - pattern-regex: log\.Logger\(\).*(appConfig).* diff --git a/Rule/gitleaks.toml b/Rule/gitleaks.toml new file mode 100644 index 00000000..e127f827 --- /dev/null +++ b/Rule/gitleaks.toml @@ -0,0 +1,3137 @@ +# This file has been auto-generated. Do not edit manually. +# If you would like to contribute new rules, please use +# cmd/generate/config/main.go and follow the contributing guidelines +# at https://github.com/gitleaks/gitleaks/blob/master/CONTRIBUTING.md +# +# How the hell does secret scanning work? Read this: +# https://lookingatcomputer.substack.com/p/regex-is-almost-all-you-need +# +# This is the default gitleaks configuration file. +# Rules and allowlists are defined within this file. +# Rules instruct gitleaks on what should be considered a secret. +# Allowlists instruct gitleaks on what is allowed, i.e. not a secret. + +title = "gitleaks config" + +[allowlist] +description = "global allow lists" +regexes = [ + '''(?i)^true|false|null$''', + '''^(?i:a+|b+|c+|d+|e+|f+|g+|h+|i+|j+|k+|l+|m+|n+|o+|p+|q+|r+|s+|t+|u+|v+|w+|x+|y+|z+|\*+|\.+)$''', + '''^\$(?:\d+|{\d+})$''', + '''^\$(?:[A-Z_]+|[a-z_]+)$''', + '''^\${(?:[A-Z_]+|[a-z_]+)}$''', + '''^\{\{[ \t]*[\w ().|]+[ \t]*}}$''', + '''^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$''', + '''^%(?:[A-Z_]+|[a-z_]+)%$''', + '''^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$''', + '''^\{\d{0,2}}$''', + '''^@(?:[A-Z_]+|[a-z_]+)@$''', + '''^/Users/(?i)[a-z0-9]+/[\w .-/]+$''', + '''^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$''', +] +paths = [ + '''gitleaks\.toml''', + '''(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$''', + '''(?i)\.(?:eot|[ot]tf|woff2?)$''', + '''(?i)\.(?:docx?|xlsx?|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf|zip)$''', + '''go\.(?:mod|sum|work(?:\.sum)?)$''', + '''(?:^|/)vendor/modules\.txt$''', + '''(?:^|/)vendor/(?:github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(?:/.*)?$''', + '''(?:^|/)gradlew(?:\.bat)?$''', + '''(?:^|/)gradle\.lockfile$''', + '''(?:^|/)mvnw(?:\.cmd)?$''', + '''(?:^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$''', + '''(?:^|/)node_modules(?:/.*)?$''', + '''(?:^|/)(?:deno\.lock|npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$''', + '''(?:^|/)bower_components(?:/.*)?$''', + '''(?:^|/)(?:angular|bootstrap|jquery(?:-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(?:\.min)?\.js(?:\.map)?$''', + '''(?:^|/)javascript\.json$''', + '''(?:^|/)(?:Pipfile|poetry)\.lock$''', + '''(?i)(?:^|/)(?:v?env|virtualenv)/lib(?:64)?(?:/.*)?$''', + '''(?i)(?:^|/)(?:lib(?:64)?/python[23](?:\.\d{1,2})+|python/[23](?:\.\d{1,2})+/lib(?:64)?)(?:/.*)?$''', + '''(?i)(?:^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(?:/.+)?$''', + '''(?:^|/)vendor/(?:bundle|ruby)(?:/.*?)?$''', + '''\.gem$''', + '''verification-metadata\.xml''', + '''Database.refactorlog''', +] +stopwords = [ + "abcdefghijklmnopqrstuvwxyz", + "014df517-39d1-4453-b7b3-9930c563627c", +] + +[[rules]] +id = "1password-service-account-token" +description = "Uncovered a possible 1Password service account token, potentially compromising access to secrets in vaults." +regex = '''ops_eyJ[a-zA-Z0-9+/]{250,}={0,3}''' +entropy = 4 +keywords = ["ops_"] + +[[rules]] +id = "adafruit-api-key" +description = "Identified a potential Adafruit API Key, which could lead to unauthorized access to Adafruit services and sensitive data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:adafruit)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["adafruit"] + +[[rules]] +id = "adobe-client-id" +description = "Detected a pattern that resembles an Adobe OAuth Web Client ID, posing a risk of compromised Adobe integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:adobe)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["adobe"] + +[[rules]] +id = "adobe-client-secret" +description = "Discovered a potential Adobe Client Secret, which, if exposed, could allow unauthorized Adobe service access and data manipulation." +regex = '''\b(p8e-(?i)[a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["p8e-"] + +[[rules]] +id = "age-secret-key" +description = "Discovered a potential Age encryption tool secret key, risking data decryption and unauthorized access to sensitive information." +regex = '''AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}''' +keywords = ["age-secret-key-1"] + +[[rules]] +id = "airtable-api-key" +description = "Uncovered a possible Airtable API Key, potentially compromising database access and leading to data leakage or alteration." +regex = '''(?i)[\w.-]{0,50}?(?:airtable)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{17})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["airtable"] + +[[rules]] +id = "algolia-api-key" +description = "Identified an Algolia API Key, which could result in unauthorized search operations and data exposure on Algolia-managed platforms." +regex = '''(?i)[\w.-]{0,50}?(?:algolia)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["algolia"] + +[[rules]] +id = "alibaba-access-key-id" +description = "Detected an Alibaba Cloud AccessKey ID, posing a risk of unauthorized cloud resource access and potential data compromise." +regex = '''\b(LTAI(?i)[a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["ltai"] + +[[rules]] +id = "alibaba-secret-key" +description = "Discovered a potential Alibaba Cloud Secret Key, potentially allowing unauthorized operations and data access within Alibaba Cloud." +regex = '''(?i)[\w.-]{0,50}?(?:alibaba)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["alibaba"] + +[[rules]] +id = "asana-client-id" +description = "Discovered a potential Asana Client ID, risking unauthorized access to Asana projects and sensitive task information." +regex = '''(?i)[\w.-]{0,50}?(?:asana)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["asana"] + +[[rules]] +id = "asana-client-secret" +description = "Identified an Asana Client Secret, which could lead to compromised project management integrity and unauthorized access." +regex = '''(?i)[\w.-]{0,50}?(?:asana)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["asana"] + +[[rules]] +id = "atlassian-api-token" +description = "Detected an Atlassian API token, posing a threat to project management and collaboration tool security and data confidentiality." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:atlassian|confluence|jira)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-zA-Z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)|\b(ATATT3[A-Za-z0-9_\-=]{186})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "atlassian", + "confluence", + "jira", + "atatt3", +] + +[[rules]] +id = "authress-service-client-access-key" +description = "Uncovered a possible Authress Service Client Access Key, which may compromise access control services and sensitive data." +regex = '''\b((?:sc|ext|scauth|authress)_(?i)[a-z0-9]{5,30}\.[a-z0-9]{4,6}\.(?-i:acc)[_-][a-z0-9-]{10,32}\.[a-z0-9+/_=-]{30,120})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sc_", + "ext_", + "scauth_", + "authress_", +] + +[[rules]] +id = "aws-access-token" +description = "Identified a pattern that may indicate AWS credentials, risking unauthorized cloud resource access and data breaches on AWS platforms." +regex = '''\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16})\b''' +entropy = 3 +keywords = [ + "a3t", + "akia", + "asia", + "abia", + "acca", +] +[[rules.allowlists]] +regexes = [ + '''.+EXAMPLE$''', +] + +[[rules]] +id = "azure-ad-client-secret" +description = "Azure AD Client Secret" +regex = '''(?:^|[\\'"\x60\s>=:(,)])([a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\'"\x60\s<),])''' +entropy = 3 +keywords = ["q~"] + +[[rules]] +id = "beamer-api-token" +description = "Detected a Beamer API token, potentially compromising content management and exposing sensitive notifications and updates." +regex = '''(?i)[\w.-]{0,50}?(?:beamer)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(b_[a-z0-9=_\-]{44})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["beamer"] + +[[rules]] +id = "bitbucket-client-id" +description = "Discovered a potential Bitbucket Client ID, risking unauthorized repository access and potential codebase exposure." +regex = '''(?i)[\w.-]{0,50}?(?:bitbucket)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bitbucket"] + +[[rules]] +id = "bitbucket-client-secret" +description = "Discovered a potential Bitbucket Client Secret, posing a risk of compromised code repositories and unauthorized access." +regex = '''(?i)[\w.-]{0,50}?(?:bitbucket)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bitbucket"] + +[[rules]] +id = "bittrex-access-key" +description = "Identified a Bittrex Access Key, which could lead to unauthorized access to cryptocurrency trading accounts and financial loss." +regex = '''(?i)[\w.-]{0,50}?(?:bittrex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bittrex"] + +[[rules]] +id = "bittrex-secret-key" +description = "Detected a Bittrex Secret Key, potentially compromising cryptocurrency transactions and financial security." +regex = '''(?i)[\w.-]{0,50}?(?:bittrex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bittrex"] + +[[rules]] +id = "cisco-meraki-api-key" +description = "Cisco Meraki is a cloud-managed IT solution that provides networking, security, and device management through an easy-to-use interface." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Mm]eraki|MERAKI))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["meraki"] + +[[rules]] +id = "clojars-api-token" +description = "Uncovered a possible Clojars API token, risking unauthorized access to Clojure libraries and potential code manipulation." +regex = '''(?i)CLOJARS_[a-z0-9]{60}''' +entropy = 2 +keywords = ["clojars_"] + +[[rules]] +id = "cloudflare-api-key" +description = "Detected a Cloudflare API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:cloudflare)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["cloudflare"] + +[[rules]] +id = "cloudflare-global-api-key" +description = "Detected a Cloudflare Global API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:cloudflare)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{37})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["cloudflare"] + +[[rules]] +id = "cloudflare-origin-ca-key" +description = "Detected a Cloudflare Origin CA Key, potentially compromising cloud application deployments and operational security." +regex = '''\b(v1\.0-[a-f0-9]{24}-[a-f0-9]{146})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "cloudflare", + "v1.0-", +] + +[[rules]] +id = "codecov-access-token" +description = "Found a pattern resembling a Codecov Access Token, posing a risk of unauthorized access to code coverage reports and sensitive data." +regex = '''(?i)[\w.-]{0,50}?(?:codecov)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["codecov"] + +[[rules]] +id = "cohere-api-token" +description = "Identified a Cohere Token, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:cohere|CO_API_KEY)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-zA-Z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "cohere", + "co_api_key", +] + +[[rules]] +id = "coinbase-access-token" +description = "Detected a Coinbase Access Token, posing a risk of unauthorized access to cryptocurrency accounts and financial transactions." +regex = '''(?i)[\w.-]{0,50}?(?:coinbase)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["coinbase"] + +[[rules]] +id = "confluent-access-token" +description = "Identified a Confluent Access Token, which could compromise access to streaming data platforms and sensitive data flow." +regex = '''(?i)[\w.-]{0,50}?(?:confluent)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["confluent"] + +[[rules]] +id = "confluent-secret-key" +description = "Found a Confluent Secret Key, potentially risking unauthorized operations and data access within Confluent services." +regex = '''(?i)[\w.-]{0,50}?(?:confluent)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["confluent"] + +[[rules]] +id = "contentful-delivery-api-token" +description = "Discovered a Contentful delivery API token, posing a risk to content management systems and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:contentful)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{43})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["contentful"] + +[[rules]] +id = "curl-auth-header" +description = "Discovered a potential authorization token provided in a curl command header, which could compromise the curl accessed resource." +regex = '''\bcurl\b(?:.*?|.*?(?:[\r\n]{1,2}.*?){1,5})[ \t\n\r](?:-H|--header)(?:=|[ \t]{0,5})(?:"(?i)(?:Authorization:[ \t]{0,5}(?:Basic[ \t]([a-z0-9+/]{8,}={0,3})|(?:Bearer|(?:Api-)?Token)[ \t]([\w=~@.+/-]{8,})|([\w=~@.+/-]{8,}))|(?:(?:X-(?:[a-z]+-)?)?(?:Api-?)?(?:Key|Token)):[ \t]{0,5}([\w=~@.+/-]{8,}))"|'(?i)(?:Authorization:[ \t]{0,5}(?:Basic[ \t]([a-z0-9+/]{8,}={0,3})|(?:Bearer|(?:Api-)?Token)[ \t]([\w=~@.+/-]{8,})|([\w=~@.+/-]{8,}))|(?:(?:X-(?:[a-z]+-)?)?(?:Api-?)?(?:Key|Token)):[ \t]{0,5}([\w=~@.+/-]{8,}))')(?:\B|\s|\z)''' +entropy = 2.75 +keywords = ["curl"] + +[[rules]] +id = "curl-auth-user" +description = "Discovered a potential basic authorization token provided in a curl command, which could compromise the curl accessed resource." +regex = '''\bcurl\b(?:.*|.*(?:[\r\n]{1,2}.*){1,5})[ \t\n\r](?:-u|--user)(?:=|[ \t]{0,5})("(:[^"]{3,}|[^:"]{3,}:|[^:"]{3,}:[^"]{3,})"|'([^:']{3,}:[^']{3,})'|((?:"[^"]{3,}"|'[^']{3,}'|[\w$@.-]+):(?:"[^"]{3,}"|'[^']{3,}'|[\w${}@.-]+)))(?:\s|\z)''' +entropy = 2 +keywords = ["curl"] +[[rules.allowlists]] +regexes = [ + '''[^:]+:(?:change(?:it|me)|pass(?:word)?|pwd|test|token|\*+|x+)''', + '''['"]?<[^>]+>['"]?:['"]?<[^>]+>|<[^:]+:[^>]+>['"]?''', + '''[^:]+:\[[^]]+]''', + '''['"]?[^:]+['"]?:['"]?\$(?:\d|\w+|\{(?:\d|\w+)})['"]?''', + '''\$\([^)]+\):\$\([^)]+\)''', + '''['"]?\$?{{[^}]+}}['"]?:['"]?\$?{{[^}]+}}['"]?''', +] + +[[rules]] +id = "databricks-api-token" +description = "Uncovered a Databricks API token, which may compromise big data analytics platforms and sensitive data processing." +regex = '''\b(dapi[a-f0-9]{32}(?:-\d)?)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["dapi"] + +[[rules]] +id = "datadog-access-token" +description = "Detected a Datadog Access Token, potentially risking monitoring and analytics data exposure and manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:datadog)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["datadog"] + +[[rules]] +id = "defined-networking-api-token" +description = "Identified a Defined Networking API token, which could lead to unauthorized network operations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:dnkey)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(dnkey-[a-z0-9=_\-]{26}-[a-z0-9=_\-]{52})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dnkey"] + +[[rules]] +id = "digitalocean-access-token" +description = "Found a DigitalOcean OAuth Access Token, risking unauthorized cloud resource access and data compromise." +regex = '''\b(doo_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["doo_v1_"] + +[[rules]] +id = "digitalocean-pat" +description = "Discovered a DigitalOcean Personal Access Token, posing a threat to cloud infrastructure security and data privacy." +regex = '''\b(dop_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["dop_v1_"] + +[[rules]] +id = "digitalocean-refresh-token" +description = "Uncovered a DigitalOcean OAuth Refresh Token, which could allow prolonged unauthorized access and resource manipulation." +regex = '''(?i)\b(dor_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dor_v1_"] + +[[rules]] +id = "discord-api-token" +description = "Detected a Discord API key, potentially compromising communication channels and user data privacy on Discord." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["discord"] + +[[rules]] +id = "discord-client-id" +description = "Identified a Discord client ID, which may lead to unauthorized integrations and data exposure in Discord applications." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{18})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["discord"] + +[[rules]] +id = "discord-client-secret" +description = "Discovered a potential Discord client secret, risking compromised Discord bot integrations and data leaks." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["discord"] + +[[rules]] +id = "doppler-api-token" +description = "Discovered a Doppler API token, posing a risk to environment and secrets management security." +regex = '''dp\.pt\.(?i)[a-z0-9]{43}''' +entropy = 2 +keywords = ["dp.pt."] + +[[rules]] +id = "droneci-access-token" +description = "Detected a Droneci Access Token, potentially compromising continuous integration and deployment workflows." +regex = '''(?i)[\w.-]{0,50}?(?:droneci)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["droneci"] + +[[rules]] +id = "dropbox-api-token" +description = "Identified a Dropbox API secret, which could lead to unauthorized file access and data breaches in Dropbox storage." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{15})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "dropbox-long-lived-api-token" +description = "Found a Dropbox long-lived API token, risking prolonged unauthorized access to cloud storage and sensitive data." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{11}(AAAAAAAAAA)[a-z0-9\-_=]{43})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "dropbox-short-lived-api-token" +description = "Discovered a Dropbox short-lived API token, posing a risk of temporary but potentially harmful data access and manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(sl\.[a-z0-9\-=_]{135})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "duffel-api-token" +description = "Uncovered a Duffel API token, which may compromise travel platform integrations and sensitive customer data." +regex = '''duffel_(?:test|live)_(?i)[a-z0-9_\-=]{43}''' +entropy = 2 +keywords = ["duffel_"] + +[[rules]] +id = "dynatrace-api-token" +description = "Detected a Dynatrace API token, potentially risking application performance monitoring and data exposure." +regex = '''dt0c01\.(?i)[a-z0-9]{24}\.[a-z0-9]{64}''' +entropy = 4 +keywords = ["dt0c01."] + +[[rules]] +id = "easypost-api-token" +description = "Identified an EasyPost API token, which could lead to unauthorized postal and shipment service access and data exposure." +regex = '''\bEZAK(?i)[a-z0-9]{54}\b''' +entropy = 2 +keywords = ["ezak"] + +[[rules]] +id = "easypost-test-api-token" +description = "Detected an EasyPost test API token, risking exposure of test environments and potentially sensitive shipment data." +regex = '''\bEZTK(?i)[a-z0-9]{54}\b''' +entropy = 2 +keywords = ["eztk"] + +[[rules]] +id = "etsy-access-token" +description = "Found an Etsy Access Token, potentially compromising Etsy shop management and customer data." +regex = '''(?i)[\w.-]{0,50}?(?:(?-i:ETSY|[Ee]tsy))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["etsy"] + +[[rules]] +id = "facebook-access-token" +description = "Discovered a Facebook Access Token, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''(?i)\b(\d{15,16}(\||%)[0-9a-z\-_]{27,40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["facebook"] + +[[rules]] +id = "facebook-page-access-token" +description = "Discovered a Facebook Page Access Token, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''\b(EAA[MC](?i)[a-z0-9]{100,})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "eaam", + "eaac", +] + +[[rules]] +id = "facebook-secret" +description = "Discovered a Facebook Application secret, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:facebook)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["facebook"] + +[[rules]] +id = "fastly-api-token" +description = "Uncovered a Fastly API key, which may compromise CDN and edge cloud services, leading to content delivery and security issues." +regex = '''(?i)[\w.-]{0,50}?(?:fastly)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["fastly"] + +[[rules]] +id = "finicity-api-token" +description = "Detected a Finicity API token, potentially risking financial data access and unauthorized financial operations." +regex = '''(?i)[\w.-]{0,50}?(?:finicity)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finicity"] + +[[rules]] +id = "finicity-client-secret" +description = "Identified a Finicity Client Secret, which could lead to compromised financial service integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:finicity)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finicity"] + +[[rules]] +id = "finnhub-access-token" +description = "Found a Finnhub Access Token, risking unauthorized access to financial market data and analytics." +regex = '''(?i)[\w.-]{0,50}?(?:finnhub)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finnhub"] + +[[rules]] +id = "flickr-access-token" +description = "Discovered a Flickr Access Token, posing a risk of unauthorized photo management and potential data leakage." +regex = '''(?i)[\w.-]{0,50}?(?:flickr)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["flickr"] + +[[rules]] +id = "flutterwave-encryption-key" +description = "Uncovered a Flutterwave Encryption Key, which may compromise payment processing and sensitive financial information." +regex = '''FLWSECK_TEST-(?i)[a-h0-9]{12}''' +entropy = 2 +keywords = ["flwseck_test"] + +[[rules]] +id = "flutterwave-public-key" +description = "Detected a Finicity Public Key, potentially exposing public cryptographic operations and integrations." +regex = '''FLWPUBK_TEST-(?i)[a-h0-9]{32}-X''' +entropy = 2 +keywords = ["flwpubk_test"] + +[[rules]] +id = "flutterwave-secret-key" +description = "Identified a Flutterwave Secret Key, risking unauthorized financial transactions and data breaches." +regex = '''FLWSECK_TEST-(?i)[a-h0-9]{32}-X''' +entropy = 2 +keywords = ["flwseck_test"] + +[[rules]] +id = "flyio-access-token" +description = "Uncovered a Fly.io API key" +regex = '''\b((?:fo1_[\w-]{43}|fm1[ar]_[a-zA-Z0-9+\/]{100,}={0,3}|fm2_[a-zA-Z0-9+\/]{100,}={0,3}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "fo1_", + "fm1", + "fm2_", +] + +[[rules]] +id = "frameio-api-token" +description = "Found a Frame.io API token, potentially compromising video collaboration and project management." +regex = '''fio-u-(?i)[a-z0-9\-_=]{64}''' +keywords = ["fio-u-"] + +[[rules]] +id = "freemius-secret-key" +description = "Detected a Freemius secret key, potentially exposing sensitive information." +regex = '''(?i)["']secret_key["']\s*=>\s*["'](sk_[\S]{29})["']''' +path = '''(?i)\.php$''' +keywords = ["secret_key"] + +[[rules]] +id = "freshbooks-access-token" +description = "Discovered a Freshbooks Access Token, posing a risk to accounting software access and sensitive financial data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:freshbooks)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["freshbooks"] + +[[rules]] +id = "gcp-api-key" +description = "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches." +regex = '''\b(AIza[\w-]{35})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["aiza"] +[[rules.allowlists]] +regexes = [ + '''AIzaSyabcdefghijklmnopqrstuvwxyz1234567''', + '''AIzaSyAnLA7NfeLquW1tJFpx_eQCxoX-oo6YyIs''', + '''AIzaSyCkEhVjf3pduRDt6d1yKOMitrUEke8agEM''', + '''AIzaSyDMAScliyLx7F0NPDEJi1QmyCgHIAODrlU''', + '''AIzaSyD3asb-2pEZVqMkmL6M9N6nHZRR_znhrh0''', + '''AIzayDNSXIbFmlXbIE6mCzDLQAqITYefhixbX4A''', + '''AIzaSyAdOS2zB6NCsk1pCdZ4-P6GBdi_UUPwX7c''', + '''AIzaSyASWm6HmTMdYWpgMnjRBjxcQ9CKctWmLd4''', + '''AIzaSyANUvH9H9BsUccjsu2pCmEkOPjjaXeDQgY''', + '''AIzaSyA5_iVawFQ8ABuTZNUdcwERLJv_a_p4wtM''', + '''AIzaSyA4UrcGxgwQFTfaI3no3t7Lt1sjmdnP5sQ''', + '''AIzaSyDSb51JiIcB6OJpwwMicseKRhhrOq1cS7g''', + '''AIzaSyBF2RrAIm4a0mO64EShQfqfd2AFnzAvvuU''', + '''AIzaSyBcE-OOIbhjyR83gm4r2MFCu4MJmprNXsw''', + '''AIzaSyB8qGxt4ec15vitgn44duC5ucxaOi4FmqE''', + '''AIzaSyA8vmApnrHNFE0bApF4hoZ11srVL_n0nvY''', +] + +[[rules]] +id = "generic-api-key" +description = "Detected a Generic API Key, potentially exposing access to various services and sensitive operations." +regex = '''(?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "access", + "api", + "auth", + "key", + "credential", + "creds", + "passwd", + "password", + "secret", + "token", +] +[[rules.allowlists]] +regexes = [ + '''^[a-zA-Z_.-]+$''', +] +[[rules.allowlists]] +regexTarget = "match" +regexes = [ + '''(?i)(?:access(?:ibility|or)|access[_.-]?id|random[_.-]?access|api[_.-]?(?:id|name|version)|rapid|capital|[a-z0-9-]*?api[a-z0-9-]*?:jar:|author|X-MS-Exchange-Organization-Auth|Authentication-Results|(?:credentials?[_.-]?id|withCredentials)|(?:bucket|foreign|hot|idx|natural|primary|pub(?:lic)?|schema|sequence)[_.-]?key|(?:turkey)|key[_.-]?(?:alias|board|code|frame|id|length|mesh|name|pair|press(?:ed)?|ring|selector|signature|size|stone|storetype|word|up|down|left|right)|key[_.-]?vault[_.-]?(?:id|name)|keyVaultToStoreSecrets|key(?:store|tab)[_.-]?(?:file|path)|issuerkeyhash|(?-i:[DdMm]onkey|[DM]ONKEY)|keying|(?:secret)[_.-]?(?:length|name|size)|UserSecretsId|(?:csrf)[_.-]?token|(?:io\.jsonwebtoken[ \t]?:[ \t]?[\w-]+)|(?:api|credentials|token)[_.-]?(?:endpoint|ur[il])|public[_.-]?token|(?:key|token)[_.-]?file|(?-i:(?:[A-Z_]+=\n[A-Z_]+=|[a-z_]+=\n[a-z_]+=)(?:\n|\z))|(?-i:(?:[A-Z.]+=\n[A-Z.]+=|[a-z.]+=\n[a-z.]+=)(?:\n|\z)))''', +] +stopwords = [ + "000000", + "aaaaaa", + "about", + "abstract", + "academy", + "acces", + "account", + "act-", + "act.", + "act_", + "action", + "active", + "actively", + "activity", + "adapter", + "add-", + "add.", + "add_", + "add-on", + "addon", + "addres", + "admin", + "adobe", + "advanced", + "adventure", + "agent", + "agile", + "air-", + "air.", + "air_", + "ajax", + "akka", + "alert", + "alfred", + "algorithm", + "all-", + "all.", + "all_", + "alloy", + "alpha", + "amazon", + "amqp", + "analysi", + "analytic", + "analyzer", + "android", + "angular", + "angularj", + "animate", + "animation", + "another", + "ansible", + "answer", + "ant-", + "ant.", + "ant_", + "any-", + "any.", + "any_", + "apache", + "app-", + "app-", + "app.", + "app.", + "app_", + "app_", + "apple", + "arch", + "archive", + "archived", + "arduino", + "array", + "art-", + "art.", + "art_", + "article", + "asp-", + "asp.", + "asp_", + "asset", + "async", + "atom", + "attention", + "audio", + "audit", + "aura", + "auth", + "author", + "author", + "authorize", + "auto", + "automated", + "automatic", + "awesome", + "aws_", + "azure", + "back", + "backbone", + "backend", + "backup", + "bar-", + "bar.", + "bar_", + "base", + "based", + "bash", + "basic", + "batch", + "been", + "beer", + "behavior", + "being", + "benchmark", + "best", + "beta", + "better", + "big-", + "big.", + "big_", + "binary", + "binding", + "bit-", + "bit.", + "bit_", + "bitcoin", + "block", + "blog", + "board", + "book", + "bookmark", + "boost", + "boot", + "bootstrap", + "bosh", + "bot-", + "bot.", + "bot_", + "bower", + "box-", + "box.", + "box_", + "boxen", + "bracket", + "branch", + "bridge", + "browser", + "brunch", + "buffer", + "bug-", + "bug.", + "bug_", + "build", + "builder", + "building", + "buildout", + "buildpack", + "built", + "bundle", + "busines", + "but-", + "but.", + "but_", + "button", + "cache", + "caching", + "cakephp", + "calendar", + "call", + "camera", + "campfire", + "can-", + "can.", + "can_", + "canva", + "captcha", + "capture", + "card", + "carousel", + "case", + "cassandra", + "cat-", + "cat.", + "cat_", + "category", + "center", + "cento", + "challenge", + "change", + "changelog", + "channel", + "chart", + "chat", + "cheat", + "check", + "checker", + "chef", + "ches", + "chinese", + "chosen", + "chrome", + "ckeditor", + "clas", + "classe", + "classic", + "clean", + "cli-", + "cli.", + "cli_", + "client", + "client", + "clojure", + "clone", + "closure", + "cloud", + "club", + "cluster", + "cms-", + "cms_", + "coco", + "code", + "coding", + "coffee", + "color", + "combination", + "combo", + "command", + "commander", + "comment", + "commit", + "common", + "community", + "compas", + "compiler", + "complete", + "component", + "composer", + "computer", + "computing", + "con-", + "con.", + "con_", + "concept", + "conf", + "config", + "config", + "connect", + "connector", + "console", + "contact", + "container", + "contao", + "content", + "contest", + "context", + "control", + "convert", + "converter", + "conway'", + "cookbook", + "cookie", + "cool", + "copy", + "cordova", + "core", + "couchbase", + "couchdb", + "countdown", + "counter", + "course", + "craft", + "crawler", + "create", + "creating", + "creator", + "credential", + "crm-", + "crm.", + "crm_", + "cros", + "crud", + "csv-", + "csv.", + "csv_", + "cube", + "cucumber", + "cuda", + "current", + "currently", + "custom", + "daemon", + "dark", + "dart", + "dash", + "dashboard", + "data", + "database", + "date", + "day-", + "day.", + "day_", + "dead", + "debian", + "debug", + "debug", + "debugger", + "deck", + "define", + "del-", + "del.", + "del_", + "delete", + "demo", + "deploy", + "design", + "designer", + "desktop", + "detection", + "detector", + "dev-", + "dev.", + "dev_", + "develop", + "developer", + "device", + "devise", + "diff", + "digital", + "directive", + "directory", + "discovery", + "display", + "django", + "dns-", + "dns_", + "doc-", + "doc-", + "doc.", + "doc.", + "doc_", + "doc_", + "docker", + "docpad", + "doctrine", + "document", + "doe-", + "doe.", + "doe_", + "dojo", + "dom-", + "dom.", + "dom_", + "domain", + "done", + "don't", + "dot-", + "dot.", + "dot_", + "dotfile", + "download", + "draft", + "drag", + "drill", + "drive", + "driven", + "driver", + "drop", + "dropbox", + "drupal", + "dsl-", + "dsl.", + "dsl_", + "dynamic", + "easy", + "_ec2_", + "ecdsa", + "eclipse", + "edit", + "editing", + "edition", + "editor", + "element", + "emac", + "email", + "embed", + "embedded", + "ember", + "emitter", + "emulator", + "encoding", + "endpoint", + "engine", + "english", + "enhanced", + "entity", + "entry", + "env_", + "episode", + "erlang", + "error", + "espresso", + "event", + "evented", + "example", + "example", + "exchange", + "exercise", + "experiment", + "expire", + "exploit", + "explorer", + "export", + "exporter", + "expres", + "ext-", + "ext.", + "ext_", + "extended", + "extension", + "external", + "extra", + "extractor", + "fabric", + "facebook", + "factory", + "fake", + "fast", + "feature", + "feed", + "fewfwef", + "ffmpeg", + "field", + "file", + "filter", + "find", + "finder", + "firefox", + "firmware", + "first", + "fish", + "fix-", + "fix_", + "flash", + "flask", + "flat", + "flex", + "flexible", + "flickr", + "flow", + "fluent", + "fluentd", + "fluid", + "folder", + "font", + "force", + "foreman", + "fork", + "form", + "format", + "formatter", + "forum", + "foundry", + "framework", + "free", + "friend", + "friendly", + "front-end", + "frontend", + "ftp-", + "ftp.", + "ftp_", + "fuel", + "full", + "fun-", + "fun.", + "fun_", + "func", + "future", + "gaia", + "gallery", + "game", + "gateway", + "gem-", + "gem.", + "gem_", + "gen-", + "gen.", + "gen_", + "general", + "generator", + "generic", + "genetic", + "get-", + "get.", + "get_", + "getenv", + "getting", + "ghost", + "gist", + "git-", + "git.", + "git_", + "github", + "gitignore", + "gitlab", + "glas", + "gmail", + "gnome", + "gnu-", + "gnu.", + "gnu_", + "goal", + "golang", + "gollum", + "good", + "google", + "gpu-", + "gpu.", + "gpu_", + "gradle", + "grail", + "graph", + "graphic", + "great", + "grid", + "groovy", + "group", + "grunt", + "guard", + "gui-", + "gui.", + "gui_", + "guide", + "guideline", + "gulp", + "gwt-", + "gwt.", + "gwt_", + "hack", + "hackathon", + "hacker", + "hacking", + "hadoop", + "haml", + "handler", + "hardware", + "has-", + "has_", + "hash", + "haskell", + "have", + "haxe", + "hello", + "help", + "helper", + "here", + "hero", + "heroku", + "high", + "hipchat", + "history", + "home", + "homebrew", + "homepage", + "hook", + "host", + "hosting", + "hot-", + "hot.", + "hot_", + "house", + "how-", + "how.", + "how_", + "html", + "http", + "hub-", + "hub.", + "hub_", + "hubot", + "human", + "icon", + "ide-", + "ide.", + "ide_", + "idea", + "identity", + "idiomatic", + "image", + "impact", + "import", + "important", + "importer", + "impres", + "index", + "infinite", + "info", + "injection", + "inline", + "input", + "inside", + "inspector", + "instagram", + "install", + "installer", + "instant", + "intellij", + "interface", + "internet", + "interview", + "into", + "intro", + "ionic", + "iphone", + "ipython", + "irc-", + "irc_", + "iso-", + "iso.", + "iso_", + "issue", + "jade", + "jasmine", + "java", + "jbos", + "jekyll", + "jenkin", + "jetbrains", + "job-", + "job.", + "job_", + "joomla", + "jpa-", + "jpa.", + "jpa_", + "jquery", + "json", + "just", + "kafka", + "karma", + "kata", + "kernel", + "keyboard", + "kindle", + "kit-", + "kit.", + "kit_", + "kitchen", + "knife", + "koan", + "kohana", + "lab-", + "lab-", + "lab.", + "lab.", + "lab_", + "lab_", + "lambda", + "lamp", + "language", + "laravel", + "last", + "latest", + "latex", + "launcher", + "layer", + "layout", + "lazy", + "ldap", + "leaflet", + "league", + "learn", + "learning", + "led-", + "led.", + "led_", + "leetcode", + "les-", + "les.", + "les_", + "level", + "leveldb", + "lib-", + "lib.", + "lib_", + "librarie", + "library", + "license", + "life", + "liferay", + "light", + "lightbox", + "like", + "line", + "link", + "linked", + "linkedin", + "linux", + "lisp", + "list", + "lite", + "little", + "load", + "loader", + "local", + "location", + "lock", + "log-", + "log.", + "log_", + "logger", + "logging", + "logic", + "login", + "logstash", + "longer", + "look", + "love", + "lua-", + "lua.", + "lua_", + "mac-", + "mac.", + "mac_", + "machine", + "made", + "magento", + "magic", + "mail", + "make", + "maker", + "making", + "man-", + "man.", + "man_", + "manage", + "manager", + "manifest", + "manual", + "map-", + "map-", + "map.", + "map.", + "map_", + "map_", + "mapper", + "mapping", + "markdown", + "markup", + "master", + "math", + "matrix", + "maven", + "md5", + "mean", + "media", + "mediawiki", + "meetup", + "memcached", + "memory", + "menu", + "merchant", + "message", + "messaging", + "meta", + "metadata", + "meteor", + "method", + "metric", + "micro", + "middleman", + "migration", + "minecraft", + "miner", + "mini", + "minimal", + "mirror", + "mit-", + "mit.", + "mit_", + "mobile", + "mocha", + "mock", + "mod-", + "mod.", + "mod_", + "mode", + "model", + "modern", + "modular", + "module", + "modx", + "money", + "mongo", + "mongodb", + "mongoid", + "mongoose", + "monitor", + "monkey", + "more", + "motion", + "moved", + "movie", + "mozilla", + "mqtt", + "mule", + "multi", + "multiple", + "music", + "mustache", + "mvc-", + "mvc.", + "mvc_", + "mysql", + "nagio", + "name", + "native", + "need", + "neo-", + "neo.", + "neo_", + "nest", + "nested", + "net-", + "net.", + "net_", + "nette", + "network", + "new-", + "new-", + "new.", + "new.", + "new_", + "new_", + "next", + "nginx", + "ninja", + "nlp-", + "nlp.", + "nlp_", + "node", + "nodej", + "nosql", + "not-", + "not.", + "not_", + "note", + "notebook", + "notepad", + "notice", + "notifier", + "now-", + "now.", + "now_", + "number", + "oauth", + "object", + "objective", + "obsolete", + "ocaml", + "octopres", + "official", + "old-", + "old.", + "old_", + "onboard", + "online", + "only", + "open", + "opencv", + "opengl", + "openshift", + "openwrt", + "option", + "oracle", + "org-", + "org.", + "org_", + "origin", + "original", + "orm-", + "orm.", + "orm_", + "osx-", + "osx_", + "our-", + "our.", + "our_", + "out-", + "out.", + "out_", + "output", + "over", + "overview", + "own-", + "own.", + "own_", + "pack", + "package", + "packet", + "page", + "page", + "panel", + "paper", + "paperclip", + "para", + "parallax", + "parallel", + "parse", + "parser", + "parsing", + "particle", + "party", + "password", + "patch", + "path", + "pattern", + "payment", + "paypal", + "pdf-", + "pdf.", + "pdf_", + "pebble", + "people", + "perl", + "personal", + "phalcon", + "phoenix", + "phone", + "phonegap", + "photo", + "php-", + "php.", + "php_", + "physic", + "picker", + "pipeline", + "platform", + "play", + "player", + "please", + "plu-", + "plu.", + "plu_", + "plug-in", + "plugin", + "plupload", + "png-", + "png.", + "png_", + "poker", + "polyfill", + "polymer", + "pool", + "pop-", + "pop.", + "pop_", + "popcorn", + "popup", + "port", + "portable", + "portal", + "portfolio", + "post", + "power", + "powered", + "powerful", + "prelude", + "pretty", + "preview", + "principle", + "print", + "pro-", + "pro.", + "pro_", + "problem", + "proc", + "product", + "profile", + "profiler", + "program", + "progres", + "project", + "protocol", + "prototype", + "provider", + "proxy", + "public", + "pull", + "puppet", + "pure", + "purpose", + "push", + "pusher", + "pyramid", + "python", + "quality", + "query", + "queue", + "quick", + "rabbitmq", + "rack", + "radio", + "rail", + "railscast", + "random", + "range", + "raspberry", + "rdf-", + "rdf.", + "rdf_", + "react", + "reactive", + "read", + "reader", + "readme", + "ready", + "real", + "reality", + "real-time", + "realtime", + "recipe", + "recorder", + "red-", + "red.", + "red_", + "reddit", + "redi", + "redmine", + "reference", + "refinery", + "refresh", + "registry", + "related", + "release", + "remote", + "rendering", + "repo", + "report", + "request", + "require", + "required", + "requirej", + "research", + "resource", + "response", + "resque", + "rest", + "restful", + "resume", + "reveal", + "reverse", + "review", + "riak", + "rich", + "right", + "ring", + "robot", + "role", + "room", + "router", + "routing", + "rpc-", + "rpc.", + "rpc_", + "rpg-", + "rpg.", + "rpg_", + "rspec", + "ruby-", + "ruby.", + "ruby_", + "rule", + "run-", + "run.", + "run_", + "runner", + "running", + "runtime", + "rust", + "rvm-", + "rvm.", + "rvm_", + "salt", + "sample", + "sample", + "sandbox", + "sas-", + "sas.", + "sas_", + "sbt-", + "sbt.", + "sbt_", + "scala", + "scalable", + "scanner", + "schema", + "scheme", + "school", + "science", + "scraper", + "scratch", + "screen", + "script", + "scroll", + "scs-", + "scs.", + "scs_", + "sdk-", + "sdk.", + "sdk_", + "sdl-", + "sdl.", + "sdl_", + "search", + "secure", + "security", + "see-", + "see.", + "see_", + "seed", + "select", + "selector", + "selenium", + "semantic", + "sencha", + "send", + "sentiment", + "serie", + "server", + "service", + "session", + "set-", + "set.", + "set_", + "setting", + "setting", + "setup", + "sha1", + "sha2", + "sha256", + "share", + "shared", + "sharing", + "sheet", + "shell", + "shield", + "shipping", + "shop", + "shopify", + "shortener", + "should", + "show", + "showcase", + "side", + "silex", + "simple", + "simulator", + "single", + "site", + "skeleton", + "sketch", + "skin", + "slack", + "slide", + "slider", + "slim", + "small", + "smart", + "smtp", + "snake", + "snapshot", + "snippet", + "soap", + "social", + "socket", + "software", + "solarized", + "solr", + "solution", + "solver", + "some", + "soon", + "source", + "space", + "spark", + "spatial", + "spec", + "sphinx", + "spine", + "spotify", + "spree", + "spring", + "sprite", + "sql-", + "sql.", + "sql_", + "sqlite", + "ssh-", + "ssh.", + "ssh_", + "stack", + "staging", + "standard", + "stanford", + "start", + "started", + "starter", + "startup", + "stat", + "statamic", + "state", + "static", + "statistic", + "statsd", + "statu", + "steam", + "step", + "still", + "stm-", + "stm.", + "stm_", + "storage", + "store", + "storm", + "story", + "strategy", + "stream", + "streaming", + "string", + "stripe", + "structure", + "studio", + "study", + "stuff", + "style", + "sublime", + "sugar", + "suite", + "summary", + "super", + "support", + "supported", + "svg-", + "svg.", + "svg_", + "svn-", + "svn.", + "svn_", + "swagger", + "swift", + "switch", + "switcher", + "symfony", + "symphony", + "sync", + "synopsi", + "syntax", + "system", + "system", + "tab-", + "tab-", + "tab.", + "tab.", + "tab_", + "tab_", + "table", + "tag-", + "tag-", + "tag.", + "tag.", + "tag_", + "tag_", + "talk", + "target", + "task", + "tcp-", + "tcp.", + "tcp_", + "tdd-", + "tdd.", + "tdd_", + "team", + "tech", + "template", + "term", + "terminal", + "testing", + "tetri", + "text", + "textmate", + "theme", + "theory", + "three", + "thrift", + "time", + "timeline", + "timer", + "tiny", + "tinymce", + "tip-", + "tip.", + "tip_", + "title", + "todo", + "todomvc", + "token", + "tool", + "toolbox", + "toolkit", + "top-", + "top.", + "top_", + "tornado", + "touch", + "tower", + "tracker", + "tracking", + "traffic", + "training", + "transfer", + "translate", + "transport", + "tree", + "trello", + "try-", + "try.", + "try_", + "tumblr", + "tut-", + "tut.", + "tut_", + "tutorial", + "tweet", + "twig", + "twitter", + "type", + "typo", + "ubuntu", + "uiview", + "ultimate", + "under", + "unit", + "unity", + "universal", + "unix", + "update", + "updated", + "upgrade", + "upload", + "uploader", + "uri-", + "uri.", + "uri_", + "url-", + "url.", + "url_", + "usage", + "usb-", + "usb.", + "usb_", + "use-", + "use.", + "use_", + "used", + "useful", + "user", + "using", + "util", + "utilitie", + "utility", + "vagrant", + "validator", + "value", + "variou", + "varnish", + "version", + "via-", + "via.", + "via_", + "video", + "view", + "viewer", + "vim-", + "vim.", + "vim_", + "vimrc", + "virtual", + "vision", + "visual", + "vpn", + "want", + "warning", + "watch", + "watcher", + "wave", + "way-", + "way.", + "way_", + "weather", + "web-", + "web_", + "webapp", + "webgl", + "webhook", + "webkit", + "webrtc", + "website", + "websocket", + "welcome", + "welcome", + "what", + "what'", + "when", + "where", + "which", + "why-", + "why.", + "why_", + "widget", + "wifi", + "wiki", + "win-", + "win.", + "win_", + "window", + "wip-", + "wip.", + "wip_", + "within", + "without", + "wizard", + "word", + "wordpres", + "work", + "worker", + "workflow", + "working", + "workshop", + "world", + "wrapper", + "write", + "writer", + "writing", + "written", + "www-", + "www.", + "www_", + "xamarin", + "xcode", + "xml-", + "xml.", + "xml_", + "xmpp", + "xxxxxx", + "yahoo", + "yaml", + "yandex", + "yeoman", + "yet-", + "yet.", + "yet_", + "yii-", + "yii.", + "yii_", + "youtube", + "yui-", + "yui.", + "yui_", + "zend", + "zero", + "zip-", + "zip.", + "zip_", + "zsh-", + "zsh.", + "zsh_", + "6fe4476ee5a1832882e326b506d14126", +] +[[rules.allowlists]] +regexTarget = "line" +regexes = [ + '''--mount=type=secret,''', + '''import[ \t]+{[ \t\w,]+}[ \t]+from[ \t]+['"][^'"]+['"]''', +] +[[rules.allowlists]] +condition = "AND" +paths = [ + '''\.bb$''','''\.bbappend$''','''\.bbclass$''','''\.inc$''', +] +regexTarget = "line" +regexes = [ + '''LICENSE[^=]*=\s*"[^"]+''', + '''LIC_FILES_CHKSUM[^=]*=\s*"[^"]+''', + '''SRC[^=]*=\s*"[a-zA-Z0-9]+''', +] + +[[rules]] +id = "github-app-token" +description = "Identified a GitHub App Token, which may compromise GitHub application integrations and source code security." +regex = '''(?:ghu|ghs)_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = [ + "ghu_", + "ghs_", +] +[[rules.allowlists]] +paths = [ + '''(?:^|/)@octokit/auth-token/README\.md$''', +] + +[[rules]] +id = "github-fine-grained-pat" +description = "Found a GitHub Fine-Grained Personal Access Token, risking unauthorized repository access and code manipulation." +regex = '''github_pat_\w{82}''' +entropy = 3 +keywords = ["github_pat_"] + +[[rules]] +id = "github-oauth" +description = "Discovered a GitHub OAuth Access Token, posing a risk of compromised GitHub account integrations and data leaks." +regex = '''gho_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["gho_"] + +[[rules]] +id = "github-pat" +description = "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure." +regex = '''ghp_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["ghp_"] +[[rules.allowlists]] +paths = [ + '''(?:^|/)@octokit/auth-token/README\.md$''', +] + +[[rules]] +id = "github-refresh-token" +description = "Detected a GitHub Refresh Token, which could allow prolonged unauthorized access to GitHub services." +regex = '''ghr_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["ghr_"] + +[[rules]] +id = "gitlab-cicd-job-token" +description = "Identified a GitLab CI/CD Job Token, potential access to projects and some APIs on behalf of a user while the CI job is running." +regex = '''glcbt-[0-9a-zA-Z]{1,5}_[0-9a-zA-Z_-]{20}''' +entropy = 3 +keywords = ["glcbt-"] + +[[rules]] +id = "gitlab-deploy-token" +description = "Identified a GitLab Deploy Token, risking access to repositories, packages and containers with write access." +regex = '''gldt-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["gldt-"] + +[[rules]] +id = "gitlab-feature-flag-client-token" +description = "Identified a GitLab feature flag client token, risks exposing user lists and features flags used by an application." +regex = '''glffct-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glffct-"] + +[[rules]] +id = "gitlab-feed-token" +description = "Identified a GitLab feed token, risking exposure of user data." +regex = '''glft-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glft-"] + +[[rules]] +id = "gitlab-incoming-mail-token" +description = "Identified a GitLab incoming mail token, risking manipulation of data sent by mail." +regex = '''glimt-[0-9a-zA-Z_\-]{25}''' +entropy = 3 +keywords = ["glimt-"] + +[[rules]] +id = "gitlab-kubernetes-agent-token" +description = "Identified a GitLab Kubernetes Agent token, risking access to repos and registry of projects connected via agent." +regex = '''glagent-[0-9a-zA-Z_\-]{50}''' +entropy = 3 +keywords = ["glagent-"] + +[[rules]] +id = "gitlab-oauth-app-secret" +description = "Identified a GitLab OIDC Application Secret, risking access to apps using GitLab as authentication provider." +regex = '''gloas-[0-9a-zA-Z_\-]{64}''' +entropy = 3 +keywords = ["gloas-"] + +[[rules]] +id = "gitlab-pat" +description = "Identified a GitLab Personal Access Token, risking unauthorized access to GitLab repositories and codebase exposure." +regex = '''glpat-[\w-]{20}''' +entropy = 3 +keywords = ["glpat-"] + +[[rules]] +id = "gitlab-pat-routable" +description = "Identified a GitLab Personal Access Token (routable), risking unauthorized access to GitLab repositories and codebase exposure." +regex = '''\bglpat-[0-9a-zA-Z_-]{27,300}\.[0-9a-z]{2}[0-9a-z]{7}\b''' +entropy = 4 +keywords = ["glpat-"] + +[[rules]] +id = "gitlab-ptt" +description = "Found a GitLab Pipeline Trigger Token, potentially compromising continuous integration workflows and project security." +regex = '''glptt-[0-9a-f]{40}''' +entropy = 3 +keywords = ["glptt-"] + +[[rules]] +id = "gitlab-rrt" +description = "Discovered a GitLab Runner Registration Token, posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''GR1348941[\w-]{20}''' +entropy = 3 +keywords = ["gr1348941"] + +[[rules]] +id = "gitlab-runner-authentication-token" +description = "Discovered a GitLab Runner Authentication Token, posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''glrt-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glrt-"] + +[[rules]] +id = "gitlab-runner-authentication-token-routable" +description = "Discovered a GitLab Runner Authentication Token (Routable), posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''\bglrt-t\d_[0-9a-zA-Z_\-]{27,300}\.[0-9a-z]{2}[0-9a-z]{7}\b''' +entropy = 4 +keywords = ["glrt-"] + +[[rules]] +id = "gitlab-scim-token" +description = "Discovered a GitLab SCIM Token, posing a risk to unauthorized access for a organization or instance." +regex = '''glsoat-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glsoat-"] + +[[rules]] +id = "gitlab-session-cookie" +description = "Discovered a GitLab Session Cookie, posing a risk to unauthorized access to a user account." +regex = '''_gitlab_session=[0-9a-z]{32}''' +entropy = 3 +keywords = ["_gitlab_session="] + +[[rules]] +id = "gitter-access-token" +description = "Uncovered a Gitter Access Token, which may lead to unauthorized access to chat and communication services." +regex = '''(?i)[\w.-]{0,50}?(?:gitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["gitter"] + +[[rules]] +id = "gocardless-api-token" +description = "Detected a GoCardless API token, potentially risking unauthorized direct debit payment operations and financial data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:gocardless)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(live_(?i)[a-z0-9\-_=]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "live_", + "gocardless", +] + +[[rules]] +id = "grafana-api-key" +description = "Identified a Grafana API key, which could compromise monitoring dashboards and sensitive data analytics." +regex = '''(?i)\b(eyJrIjoi[A-Za-z0-9]{70,400}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["eyjrijoi"] + +[[rules]] +id = "grafana-cloud-api-token" +description = "Found a Grafana cloud API token, risking unauthorized access to cloud-based monitoring services and data exposure." +regex = '''(?i)\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["glc_"] + +[[rules]] +id = "grafana-service-account-token" +description = "Discovered a Grafana service account token, posing a risk of compromised monitoring services and data integrity." +regex = '''(?i)\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["glsa_"] + +[[rules]] +id = "harness-api-key" +description = "Identified a Harness Access Token (PAT or SAT), risking unauthorized access to a Harness account." +regex = '''(?:pat|sat)\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9]{24}\.[a-zA-Z0-9]{20}''' +keywords = [ + "pat.", + "sat.", +] + +[[rules]] +id = "hashicorp-tf-api-token" +description = "Uncovered a HashiCorp Terraform user/org API token, which may lead to unauthorized infrastructure management and security breaches." +regex = '''(?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70}''' +entropy = 3.5 +keywords = ["atlasv1"] + +[[rules]] +id = "hashicorp-tf-password" +description = "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches." +regex = '''(?i)[\w.-]{0,50}?(?:administrator_login_password|password)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}("[a-z0-9=_\-]{8,20}")(?:[\x60'"\s;]|\\[nr]|$)''' +path = '''(?i)\.(?:tf|hcl)$''' +entropy = 2 +keywords = [ + "administrator_login_password", + "password", +] + +[[rules]] +id = "heroku-api-key" +description = "Detected a Heroku API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:heroku)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["heroku"] + +[[rules]] +id = "hubspot-api-key" +description = "Found a HubSpot API Token, posing a risk to CRM data integrity and unauthorized marketing operations." +regex = '''(?i)[\w.-]{0,50}?(?:hubspot)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["hubspot"] + +[[rules]] +id = "huggingface-access-token" +description = "Discovered a Hugging Face Access token, which could lead to unauthorized access to AI models and sensitive data." +regex = '''\b(hf_(?i:[a-z]{34}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["hf_"] + +[[rules]] +id = "huggingface-organization-api-token" +description = "Uncovered a Hugging Face Organization API token, potentially compromising AI organization accounts and associated data." +regex = '''\b(api_org_(?i:[a-z]{34}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["api_org_"] + +[[rules]] +id = "infracost-api-token" +description = "Detected an Infracost API Token, risking unauthorized access to cloud cost estimation tools and financial data." +regex = '''\b(ico-[a-zA-Z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["ico-"] + +[[rules]] +id = "intercom-api-key" +description = "Identified an Intercom API Token, which could compromise customer communication channels and data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:intercom)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{60})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["intercom"] + +[[rules]] +id = "intra42-client-secret" +description = "Found a Intra42 client secret, which could lead to unauthorized access to the 42School API and sensitive data." +regex = '''\b(s-s4t2(?:ud|af)-(?i)[abcdef0123456789]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "intra", + "s-s4t2ud-", + "s-s4t2af-", +] + +[[rules]] +id = "jfrog-api-key" +description = "Found a JFrog API Key, posing a risk of unauthorized access to software artifact repositories and build pipelines." +regex = '''(?i)[\w.-]{0,50}?(?:jfrog|artifactory|bintray|xray)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{73})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "jfrog", + "artifactory", + "bintray", + "xray", +] + +[[rules]] +id = "jfrog-identity-token" +description = "Discovered a JFrog Identity Token, potentially compromising access to JFrog services and sensitive software artifacts." +regex = '''(?i)[\w.-]{0,50}?(?:jfrog|artifactory|bintray|xray)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "jfrog", + "artifactory", + "bintray", + "xray", +] + +[[rules]] +id = "jwt" +description = "Uncovered a JSON Web Token, which may lead to unauthorized access to web applications and sensitive user data." +regex = '''\b(ey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9\/\\_-]{17,}\.(?:[a-zA-Z0-9\/\\_-]{10,}={0,2})?)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["ey"] + +[[rules]] +id = "jwt-base64" +description = "Detected a Base64-encoded JSON Web Token, posing a risk of exposing encoded authentication and data exchange information." +regex = '''\bZXlK(?:(?PaGJHY2lPaU)|(?PaGNIVWlPaU)|(?PaGNIWWlPaU)|(?PaGRXUWlPaU)|(?PaU5qUWlP)|(?PamNtbDBJanBi)|(?PamRIa2lPaU)|(?PbGNHc2lPbn)|(?PbGJtTWlPaU)|(?PcWEzVWlPaU)|(?PcWQyc2lPb)|(?PcGMzTWlPaU)|(?PcGRpSTZJ)|(?PcmFXUWlP)|(?PclpYbGZiM0J6SWpwY)|(?PcmRIa2lPaUp)|(?PdWIyNWpaU0k2)|(?Pd01tTWlP)|(?Pd01uTWlPaU)|(?Pd2NIUWlPaU)|(?PemRXSWlPaU)|(?PemRuUWlP)|(?PMFlXY2lPaU)|(?PMGVYQWlPaUp)|(?PMWNtd2l)|(?PMWMyVWlPaUp)|(?PMlpYSWlPaU)|(?PMlpYSnphVzl1SWpv)|(?PNElqb2)|(?PNE5XTWlP)|(?PNE5YUWlPaU)|(?PNE5YUWpVekkxTmlJNkl)|(?PNE5YVWlPaU)|(?PNmFYQWlPaU))[a-zA-Z0-9\/\\_+\-\r\n]{40,}={0,2}''' +entropy = 2 +keywords = ["zxlk"] + +[[rules]] +id = "kraken-access-token" +description = "Identified a Kraken Access Token, potentially compromising cryptocurrency trading accounts and financial security." +regex = '''(?i)[\w.-]{0,50}?(?:kraken)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9\/=_\+\-]{80,90})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kraken"] + +[[rules]] +id = "kubernetes-secret-yaml" +description = "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments" +regex = '''(?i)(?:\bkind:[ \t]*["']?\bsecret\b["']?(?:.|\s){0,200}?\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))|\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))(?:.|\s){0,200}?\bkind:[ \t]*["']?\bsecret\b["']?)''' +path = '''(?i)\.ya?ml$''' +keywords = ["secret"] +[[rules.allowlists]] +regexes = [ + '''[\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:\{\{[ \t\w"|$:=,.-]+}}|""|'')''', +] +[[rules.allowlists]] +regexTarget = "match" +regexes = [ + '''(kind:(?:.|\s)+\n---\n(?:.|\s)+\bdata:|data:(?:.|\s)+\n---\n(?:.|\s)+\bkind:)''', +] + +[[rules]] +id = "kucoin-access-token" +description = "Found a Kucoin Access Token, risking unauthorized access to cryptocurrency exchange services and transactions." +regex = '''(?i)[\w.-]{0,50}?(?:kucoin)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kucoin"] + +[[rules]] +id = "kucoin-secret-key" +description = "Discovered a Kucoin Secret Key, which could lead to compromised cryptocurrency operations and financial data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:kucoin)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kucoin"] + +[[rules]] +id = "launchdarkly-access-token" +description = "Uncovered a Launchdarkly Access Token, potentially compromising feature flag management and application functionality." +regex = '''(?i)[\w.-]{0,50}?(?:launchdarkly)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["launchdarkly"] + +[[rules]] +id = "linear-api-key" +description = "Detected a Linear API Token, posing a risk to project management tools and sensitive task data." +regex = '''lin_api_(?i)[a-z0-9]{40}''' +entropy = 2 +keywords = ["lin_api_"] + +[[rules]] +id = "linear-client-secret" +description = "Identified a Linear Client Secret, which may compromise secure integrations and sensitive project management data." +regex = '''(?i)[\w.-]{0,50}?(?:linear)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["linear"] + +[[rules]] +id = "linkedin-client-id" +description = "Found a LinkedIn Client ID, risking unauthorized access to LinkedIn integrations and professional data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:linked[_-]?in)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{14})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "linkedin", + "linked_in", + "linked-in", +] + +[[rules]] +id = "linkedin-client-secret" +description = "Discovered a LinkedIn Client secret, potentially compromising LinkedIn application integrations and user data." +regex = '''(?i)[\w.-]{0,50}?(?:linked[_-]?in)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "linkedin", + "linked_in", + "linked-in", +] + +[[rules]] +id = "lob-api-key" +description = "Uncovered a Lob API Key, which could lead to unauthorized access to mailing and address verification services." +regex = '''(?i)[\w.-]{0,50}?(?:lob)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}((live|test)_[a-f0-9]{35})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "test_", + "live_", +] + +[[rules]] +id = "lob-pub-api-key" +description = "Detected a Lob Publishable API Key, posing a risk of exposing mail and print service integrations." +regex = '''(?i)[\w.-]{0,50}?(?:lob)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}((test|live)_pub_[a-f0-9]{31})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "test_pub", + "live_pub", + "_pub", +] + +[[rules]] +id = "mailchimp-api-key" +description = "Identified a Mailchimp API key, potentially compromising email marketing campaigns and subscriber data." +regex = '''(?i)[\w.-]{0,50}?(?:MailchimpSDK.initialize|mailchimp)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32}-us\d\d)(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailchimp"] + +[[rules]] +id = "mailgun-private-api-token" +description = "Found a Mailgun private API token, risking unauthorized email service operations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(key-[a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mailgun-pub-key" +description = "Discovered a Mailgun public validation key, which could expose email verification processes and associated data." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(pubkey-[a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mailgun-signing-key" +description = "Uncovered a Mailgun webhook signing key, potentially compromising email automation and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mapbox-api-token" +description = "Detected a MapBox API token, posing a risk to geospatial services and sensitive location data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:mapbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(pk\.[a-z0-9]{60}\.[a-z0-9]{22})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mapbox"] + +[[rules]] +id = "mattermost-access-token" +description = "Identified a Mattermost Access Token, which may compromise team communication channels and data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:mattermost)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{26})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mattermost"] + +[[rules]] +id = "maxmind-license-key" +description = "Discovered a potential MaxMind license key." +regex = '''\b([A-Za-z0-9]{6}_[A-Za-z0-9]{29}_mmk)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["_mmk"] + +[[rules]] +id = "messagebird-api-token" +description = "Found a MessageBird API token, risking unauthorized access to communication platforms and message data." +regex = '''(?i)[\w.-]{0,50}?(?:message[_-]?bird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{25})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "messagebird", + "message-bird", + "message_bird", +] + +[[rules]] +id = "messagebird-client-id" +description = "Discovered a MessageBird client ID, potentially compromising API integrations and sensitive communication data." +regex = '''(?i)[\w.-]{0,50}?(?:message[_-]?bird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "messagebird", + "message-bird", + "message_bird", +] + +[[rules]] +id = "microsoft-teams-webhook" +description = "Uncovered a Microsoft Teams Webhook, which could lead to unauthorized access to team collaboration tools and data leaks." +regex = '''https://[a-z0-9]+\.webhook\.office\.com/webhookb2/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}@[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}/IncomingWebhook/[a-z0-9]{32}/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}''' +keywords = [ + "webhook.office.com", + "webhookb2", + "incomingwebhook", +] + +[[rules]] +id = "netlify-access-token" +description = "Detected a Netlify Access Token, potentially compromising web hosting services and site management." +regex = '''(?i)[\w.-]{0,50}?(?:netlify)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40,46})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["netlify"] + +[[rules]] +id = "new-relic-browser-api-token" +description = "Identified a New Relic ingest browser API token, risking unauthorized access to application performance data and analytics." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRJS-[a-f0-9]{19})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrjs-"] + +[[rules]] +id = "new-relic-insert-key" +description = "Discovered a New Relic insight insert key, compromising data injection into the platform." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRII-[a-z0-9-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrii-"] + +[[rules]] +id = "new-relic-user-api-id" +description = "Found a New Relic user API ID, posing a risk to application monitoring services and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "new-relic", + "newrelic", + "new_relic", +] + +[[rules]] +id = "new-relic-user-api-key" +description = "Discovered a New Relic user API Key, which could lead to compromised application insights and performance monitoring." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRAK-[a-z0-9]{27})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrak"] + +[[rules]] +id = "npm-access-token" +description = "Uncovered an npm access token, potentially compromising package management and code repository access." +regex = '''(?i)\b(npm_[a-z0-9]{36})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["npm_"] + +[[rules]] +id = "nuget-config-password" +description = "Identified a password within a Nuget config file, potentially compromising package management access." +regex = '''(?i)''' +path = '''(?i)nuget\.config$''' +entropy = 1 +keywords = ["|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "nytimes", + "new-york-times", + "newyorktimes", +] + +[[rules]] +id = "octopus-deploy-api-key" +description = "Discovered a potential Octopus Deploy API key, risking application deployments and operational security." +regex = '''\b(API-[A-Z0-9]{26})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["api-"] + +[[rules]] +id = "okta-access-token" +description = "Identified an Okta Access Token, which may compromise identity management services and user authentication data." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Oo]kta|OKTA))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(00[\w=\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["okta"] + +[[rules]] +id = "openai-api-key" +description = "Found an OpenAI API Key, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["t3blbkfj"] + +[[rules]] +id = "openshift-user-token" +description = "Found an OpenShift user token, potentially compromising an OpenShift/Kubernetes cluster." +regex = '''\b(sha256~[\w-]{43})(?:[^\w-]|\z)''' +entropy = 3.5 +keywords = ["sha256~"] + +[[rules]] +id = "pkcs12-file" +description = "Found a PKCS #12 file, which commonly contain bundled private keys." +path = '''(?i)(?:^|\/)[^\/]+\.p(?:12|fx)$''' + +[[rules]] +id = "plaid-api-token" +description = "Discovered a Plaid API Token, potentially compromising financial data aggregation and banking services." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(access-(?:sandbox|development|production)-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["plaid"] + +[[rules]] +id = "plaid-client-id" +description = "Uncovered a Plaid Client ID, which could lead to unauthorized financial service integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["plaid"] + +[[rules]] +id = "plaid-secret-key" +description = "Detected a Plaid Secret key, risking unauthorized access to financial accounts and sensitive transaction data." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["plaid"] + +[[rules]] +id = "planetscale-api-token" +description = "Identified a PlanetScale API token, potentially compromising database management and operations." +regex = '''\b(pscale_tkn_(?i)[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_tkn_"] + +[[rules]] +id = "planetscale-oauth-token" +description = "Found a PlanetScale OAuth token, posing a risk to database access control and sensitive data integrity." +regex = '''\b(pscale_oauth_[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_oauth_"] + +[[rules]] +id = "planetscale-password" +description = "Discovered a PlanetScale password, which could lead to unauthorized database operations and data breaches." +regex = '''(?i)\b(pscale_pw_(?i)[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_pw_"] + +[[rules]] +id = "postman-api-token" +description = "Uncovered a Postman API token, potentially compromising API testing and development workflows." +regex = '''\b(PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pmak-"] + +[[rules]] +id = "prefect-api-token" +description = "Detected a Prefect API token, risking unauthorized access to workflow management and automation services." +regex = '''\b(pnu_[a-zA-Z0-9]{36})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["pnu_"] + +[[rules]] +id = "private-key" +description = "Identified a Private Key, which may compromise cryptographic security and sensitive data encryption." +regex = '''(?i)-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\s\S-]{64,}?KEY(?: BLOCK)?-----''' +keywords = ["-----begin"] + +[[rules]] +id = "privateai-api-token" +description = "Identified a PrivateAI Token, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:private[_-]?ai)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "privateai", + "private_ai", + "private-ai", +] + +[[rules]] +id = "pulumi-api-token" +description = "Found a Pulumi API token, posing a risk to infrastructure as code services and cloud resource management." +regex = '''\b(pul-[a-f0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["pul-"] + +[[rules]] +id = "pypi-upload-token" +description = "Discovered a PyPI upload token, potentially compromising Python package distribution and repository integrity." +regex = '''pypi-AgEIcHlwaS5vcmc[\w-]{50,1000}''' +entropy = 3 +keywords = ["pypi-ageichlwas5vcmc"] + +[[rules]] +id = "rapidapi-access-token" +description = "Uncovered a RapidAPI Access Token, which could lead to unauthorized access to various APIs and data services." +regex = '''(?i)[\w.-]{0,50}?(?:rapidapi)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{50})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["rapidapi"] + +[[rules]] +id = "readme-api-token" +description = "Detected a Readme API token, risking unauthorized documentation management and content exposure." +regex = '''\b(rdme_[a-z0-9]{70})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["rdme_"] + +[[rules]] +id = "rubygems-api-token" +description = "Identified a Rubygem API token, potentially compromising Ruby library distribution and package management." +regex = '''\b(rubygems_[a-f0-9]{48})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["rubygems_"] + +[[rules]] +id = "scalingo-api-token" +description = "Found a Scalingo API token, posing a risk to cloud platform services and application deployment security." +regex = '''\b(tk-us-[\w-]{48})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["tk-us-"] + +[[rules]] +id = "sendbird-access-id" +description = "Discovered a Sendbird Access ID, which could compromise chat and messaging platform integrations." +regex = '''(?i)[\w.-]{0,50}?(?:sendbird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sendbird"] + +[[rules]] +id = "sendbird-access-token" +description = "Uncovered a Sendbird Access Token, potentially risking unauthorized access to communication services and user data." +regex = '''(?i)[\w.-]{0,50}?(?:sendbird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sendbird"] + +[[rules]] +id = "sendgrid-api-token" +description = "Detected a SendGrid API token, posing a risk of unauthorized email service operations and data exposure." +regex = '''\b(SG\.(?i)[a-z0-9=_\-\.]{66})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["sg."] + +[[rules]] +id = "sendinblue-api-token" +description = "Identified a Sendinblue API token, which may compromise email marketing services and subscriber data privacy." +regex = '''\b(xkeysib-[a-f0-9]{64}\-(?i)[a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["xkeysib-"] + +[[rules]] +id = "sentry-access-token" +description = "Found a Sentry.io Access Token (old format), risking unauthorized access to error tracking services and sensitive application data." +regex = '''(?i)[\w.-]{0,50}?(?:sentry)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sentry"] + +[[rules]] +id = "sentry-org-token" +description = "Found a Sentry.io Organization Token, risking unauthorized access to error tracking services and sensitive application data." +regex = '''\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}(?:[^a-zA-Z0-9+/]|\z)''' +entropy = 4.5 +keywords = ["sntrys_eyjpyxqio"] + +[[rules]] +id = "sentry-user-token" +description = "Found a Sentry.io User Token, risking unauthorized access to error tracking services and sensitive application data." +regex = '''\b(sntryu_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["sntryu_"] + +[[rules]] +id = "settlemint-application-access-token" +description = "Found a Settlemint Application Access Token." +regex = '''\b(sm_aat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_aat"] + +[[rules]] +id = "settlemint-personal-access-token" +description = "Found a Settlemint Personal Access Token." +regex = '''\b(sm_pat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_pat"] + +[[rules]] +id = "settlemint-service-access-token" +description = "Found a Settlemint Service Access Token." +regex = '''\b(sm_sat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_sat"] + +[[rules]] +id = "shippo-api-token" +description = "Discovered a Shippo API token, potentially compromising shipping services and customer order data." +regex = '''\b(shippo_(?:live|test)_[a-fA-F0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["shippo_"] + +[[rules]] +id = "shopify-access-token" +description = "Uncovered a Shopify access token, which could lead to unauthorized e-commerce platform access and data breaches." +regex = '''shpat_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpat_"] + +[[rules]] +id = "shopify-custom-access-token" +description = "Detected a Shopify custom access token, potentially compromising custom app integrations and e-commerce data security." +regex = '''shpca_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpca_"] + +[[rules]] +id = "shopify-private-app-access-token" +description = "Identified a Shopify private app access token, risking unauthorized access to private app data and store operations." +regex = '''shppa_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shppa_"] + +[[rules]] +id = "shopify-shared-secret" +description = "Found a Shopify shared secret, posing a risk to application authentication and e-commerce platform security." +regex = '''shpss_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpss_"] + +[[rules]] +id = "sidekiq-secret" +description = "Discovered a Sidekiq Secret, which could lead to compromised background job processing and application data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:BUNDLE_ENTERPRISE__CONTRIBSYS__COM|BUNDLE_GEMS__CONTRIBSYS__COM)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{8}:[a-f0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "bundle_enterprise__contribsys__com", + "bundle_gems__contribsys__com", +] + +[[rules]] +id = "sidekiq-sensitive-url" +description = "Uncovered a Sidekiq Sensitive URL, potentially exposing internal job queues and sensitive operation details." +regex = '''(?i)\bhttps?://([a-f0-9]{8}:[a-f0-9]{8})@(?:gems.contribsys.com|enterprise.contribsys.com)(?:[\/|\#|\?|:]|$)''' +keywords = [ + "gems.contribsys.com", + "enterprise.contribsys.com", +] + +[[rules]] +id = "slack-app-token" +description = "Detected a Slack App-level token, risking unauthorized access to Slack applications and workspace data." +regex = '''(?i)xapp-\d-[A-Z0-9]+-\d+-[a-z0-9]+''' +entropy = 2 +keywords = ["xapp"] + +[[rules]] +id = "slack-bot-token" +description = "Identified a Slack Bot token, which may compromise bot integrations and communication channel security." +regex = '''xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*''' +entropy = 3 +keywords = ["xoxb"] + +[[rules]] +id = "slack-config-access-token" +description = "Found a Slack Configuration access token, posing a risk to workspace configuration and sensitive data access." +regex = '''(?i)xoxe.xox[bp]-\d-[A-Z0-9]{163,166}''' +entropy = 2 +keywords = [ + "xoxe.xoxb-", + "xoxe.xoxp-", +] + +[[rules]] +id = "slack-config-refresh-token" +description = "Discovered a Slack Configuration refresh token, potentially allowing prolonged unauthorized access to configuration settings." +regex = '''(?i)xoxe-\d-[A-Z0-9]{146}''' +entropy = 2 +keywords = ["xoxe-"] + +[[rules]] +id = "slack-legacy-bot-token" +description = "Uncovered a Slack Legacy bot token, which could lead to compromised legacy bot operations and data exposure." +regex = '''xoxb-[0-9]{8,14}-[a-zA-Z0-9]{18,26}''' +entropy = 2 +keywords = ["xoxb"] + +[[rules]] +id = "slack-legacy-token" +description = "Detected a Slack Legacy token, risking unauthorized access to older Slack integrations and user data." +regex = '''xox[os]-\d+-\d+-\d+-[a-fA-F\d]+''' +entropy = 2 +keywords = [ + "xoxo", + "xoxs", +] + +[[rules]] +id = "slack-legacy-workspace-token" +description = "Identified a Slack Legacy Workspace token, potentially compromising access to workspace data and legacy features." +regex = '''xox[ar]-(?:\d-)?[0-9a-zA-Z]{8,48}''' +entropy = 2 +keywords = [ + "xoxa", + "xoxr", +] + +[[rules]] +id = "slack-user-token" +description = "Found a Slack User token, posing a risk of unauthorized user impersonation and data access within Slack workspaces." +regex = '''xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}''' +entropy = 2 +keywords = [ + "xoxp-", + "xoxe-", +] + +[[rules]] +id = "slack-webhook-url" +description = "Discovered a Slack Webhook, which could lead to unauthorized message posting and data leakage in Slack channels." +regex = '''(?:https?://)?hooks.slack.com/(?:services|workflows|triggers)/[A-Za-z0-9+/]{43,56}''' +keywords = ["hooks.slack.com"] + +[[rules]] +id = "snyk-api-token" +description = "Uncovered a Snyk API token, potentially compromising software vulnerability scanning and code security." +regex = '''(?i)[\w.-]{0,50}?(?:snyk[_.-]?(?:(?:api|oauth)[_.-]?)?(?:key|token))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["snyk"] + +[[rules]] +id = "sonar-api-token" +description = "Uncovered a Sonar API token, potentially compromising software vulnerability scanning and code security." +regex = '''(?i)[\w.-]{0,50}?(?:sonar[_.-]?(login|token))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sonar"] + +[[rules]] +id = "sourcegraph-access-token" +description = "Sourcegraph is a code search and navigation engine." +regex = '''(?i)\b(\b(sgp_(?:[a-fA-F0-9]{16}|local)_[a-fA-F0-9]{40}|sgp_[a-fA-F0-9]{40}|[a-fA-F0-9]{40})\b)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "sgp_", + "sourcegraph", +] + +[[rules]] +id = "square-access-token" +description = "Detected a Square Access Token, risking unauthorized payment processing and financial transaction exposure." +regex = '''\b((?:EAAA|sq0atp-)[\w-]{22,60})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sq0atp-", + "eaaa", +] + +[[rules]] +id = "squarespace-access-token" +description = "Identified a Squarespace Access Token, which may compromise website management and content control on Squarespace." +regex = '''(?i)[\w.-]{0,50}?(?:squarespace)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["squarespace"] + +[[rules]] +id = "stripe-access-token" +description = "Found a Stripe Access Token, posing a risk to payment processing services and sensitive financial data." +regex = '''\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sk_test", + "sk_live", + "sk_prod", + "rk_test", + "rk_live", + "rk_prod", +] + +[[rules]] +id = "sumologic-access-id" +description = "Discovered a SumoLogic Access ID, potentially compromising log management services and data analytics integrity." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Ss]umo|SUMO))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(su[a-zA-Z0-9]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sumo"] + +[[rules]] +id = "sumologic-access-token" +description = "Uncovered a SumoLogic Access Token, which could lead to unauthorized access to log data and analytics insights." +regex = '''(?i)[\w.-]{0,50}?(?:(?-i:[Ss]umo|SUMO))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sumo"] + +[[rules]] +id = "telegram-bot-api-token" +description = "Detected a Telegram Bot API Token, risking unauthorized bot operations and message interception on Telegram." +regex = '''(?i)[\w.-]{0,50}?(?:telegr)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{5,16}:(?-i:A)[a-z0-9_\-]{34})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["telegr"] + +[[rules]] +id = "travisci-access-token" +description = "Identified a Travis CI Access Token, potentially compromising continuous integration services and codebase security." +regex = '''(?i)[\w.-]{0,50}?(?:travis)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{22})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["travis"] + +[[rules]] +id = "twilio-api-key" +description = "Found a Twilio API Key, posing a risk to communication services and sensitive customer interaction data." +regex = '''SK[0-9a-fA-F]{32}''' +entropy = 3 +keywords = ["sk"] + +[[rules]] +id = "twitch-api-token" +description = "Discovered a Twitch API token, which could compromise streaming services and account integrations." +regex = '''(?i)[\w.-]{0,50}?(?:twitch)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitch"] + +[[rules]] +id = "twitter-access-secret" +description = "Uncovered a Twitter Access Secret, potentially risking unauthorized Twitter integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{45})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-access-token" +description = "Detected a Twitter Access Token, posing a risk of unauthorized account operations and social media data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{15,25}-[a-zA-Z0-9]{20,40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-api-key" +description = "Identified a Twitter API Key, which may compromise Twitter application integrations and user data security." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{25})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-api-secret" +description = "Found a Twitter API Secret, risking the security of Twitter app integrations and sensitive data access." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{50})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-bearer-token" +description = "Discovered a Twitter Bearer Token, potentially compromising API access and data retrieval from Twitter." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(A{22}[a-zA-Z0-9%]{80,100})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "typeform-api-token" +description = "Uncovered a Typeform API token, which could lead to unauthorized survey management and data collection." +regex = '''(?i)[\w.-]{0,50}?(?:typeform)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(tfp_[a-z0-9\-_\.=]{59})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["tfp_"] + +[[rules]] +id = "vault-batch-token" +description = "Detected a Vault Batch Token, risking unauthorized access to secret management services and sensitive data." +regex = '''\b(hvb\.[\w-]{138,300})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["hvb."] + +[[rules]] +id = "vault-service-token" +description = "Identified a Vault Service Token, potentially compromising infrastructure security and access to sensitive credentials." +regex = '''\b((?:hvs\.[\w-]{90,120}|s\.(?i:[a-z0-9]{24})))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "hvs.", + "s.", +] +[[rules.allowlists]] +regexes = [ + '''s\.[A-Za-z]{24}''', +] + +[[rules]] +id = "yandex-access-token" +description = "Found a Yandex Access Token, posing a risk to Yandex service integrations and user data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(t1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "yandex-api-key" +description = "Discovered a Yandex API Key, which could lead to unauthorized access to Yandex services and data manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(AQVN[A-Za-z0-9_\-]{35,38})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "yandex-aws-access-token" +description = "Uncovered a Yandex AWS Access Token, potentially compromising cloud resource access and data security on Yandex Cloud." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(YC[a-zA-Z0-9_\-]{38})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "zendesk-secret-key" +description = "Detected a Zendesk Secret Key, risking unauthorized access to customer support services and sensitive ticketing data." +regex = '''(?i)[\w.-]{0,50}?(?:zendesk)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["zendesk"] From 61e368f12f2a9aa6cfcf09e1cc113e7ff4be31ba Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Wed, 10 Sep 2025 19:39:40 +0530 Subject: [PATCH 04/42] SK-971 file upload support (#191) * SK-971 file upload support --- skyflow/generated/rest/__init__.py | 6 +- skyflow/generated/rest/core/client_wrapper.py | 2 +- skyflow/generated/rest/files/client.py | 16 +- skyflow/generated/rest/files/raw_client.py | 16 +- ...deidentify_image_request_masking_method.py | 2 +- skyflow/generated/rest/guardrails/client.py | 12 +- skyflow/generated/rest/records/client.py | 141 +++++++++ skyflow/generated/rest/records/raw_client.py | 239 +++++++++++++++ skyflow/generated/rest/types/__init__.py | 6 +- .../rest/types/deidentify_status_response.py | 4 +- .../deidentify_status_response_output_type.py | 2 +- .../deidentify_status_response_status.py | 2 +- skyflow/generated/rest/types/entity_type.py | 4 +- skyflow/generated/rest/types/error_string.py | 3 - .../rest/types/reidentify_file_response.py | 3 +- .../reidentify_file_response_output_type.py | 5 + .../types/reidentify_file_response_status.py | 2 +- .../rest/types/reidentify_string_response.py | 2 +- .../rest/types/upload_file_v_2_response.py | 34 +++ skyflow/utils/_skyflow_messages.py | 9 + skyflow/utils/validations/__init__.py | 1 + skyflow/utils/validations/_validations.py | 64 ++++ skyflow/vault/controller/_vault.py | 57 +++- skyflow/vault/data/__init__.py | 4 +- skyflow/vault/data/_file_upload_request.py | 18 ++ skyflow/vault/data/_file_upload_response.py | 6 + tests/vault/controller/test__vault.py | 282 +++++++++++++++++- 27 files changed, 893 insertions(+), 49 deletions(-) delete mode 100644 skyflow/generated/rest/types/error_string.py create mode 100644 skyflow/generated/rest/types/reidentify_file_response_output_type.py create mode 100644 skyflow/generated/rest/types/upload_file_v_2_response.py create mode 100644 skyflow/vault/data/_file_upload_request.py create mode 100644 skyflow/vault/data/_file_upload_response.py diff --git a/skyflow/generated/rest/__init__.py b/skyflow/generated/rest/__init__.py index bad57c24..b8309d05 100644 --- a/skyflow/generated/rest/__init__.py +++ b/skyflow/generated/rest/__init__.py @@ -28,12 +28,12 @@ EntityTypes, ErrorResponse, ErrorResponseError, - ErrorString, GooglerpcStatus, ProtobufAny, RedactionEnumRedaction, ReidentifyFileResponse, ReidentifyFileResponseOutput, + ReidentifyFileResponseOutputType, ReidentifyFileResponseStatus, ReidentifyStringResponse, RequestActionType, @@ -46,6 +46,7 @@ Transformations, TransformationsShiftDates, TransformationsShiftDatesEntityTypesItem, + UploadFileV2Response, Uuid, V1AuditAfterOptions, V1AuditEventResponse, @@ -175,7 +176,6 @@ "EntityTypes", "ErrorResponse", "ErrorResponseError", - "ErrorString", "GooglerpcStatus", "InternalServerError", "NotFoundError", @@ -189,6 +189,7 @@ "ReidentifyFileRequestFormat", "ReidentifyFileResponse", "ReidentifyFileResponseOutput", + "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", "ReidentifyStringRequestFormat", "ReidentifyStringResponse", @@ -205,6 +206,7 @@ "TransformationsShiftDates", "TransformationsShiftDatesEntityTypesItem", "UnauthorizedError", + "UploadFileV2Response", "Uuid", "V1AuditAfterOptions", "V1AuditEventResponse", diff --git a/skyflow/generated/rest/core/client_wrapper.py b/skyflow/generated/rest/core/client_wrapper.py index a3210a7e..5179f373 100644 --- a/skyflow/generated/rest/core/client_wrapper.py +++ b/skyflow/generated/rest/core/client_wrapper.py @@ -24,7 +24,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "skyflow_vault", - "X-Fern-SDK-Version": "0.0.252", + "X-Fern-SDK-Version": "0.0.323", **(self.get_custom_headers() or {}), } headers["Authorization"] = f"Bearer {self._get_token()}" diff --git a/skyflow/generated/rest/files/client.py b/skyflow/generated/rest/files/client.py index 654789de..4d5d548b 100644 --- a/skyflow/generated/rest/files/client.py +++ b/skyflow/generated/rest/files/client.py @@ -200,8 +200,8 @@ def deidentify_pdf( vault_id: VaultId, file: DeidentifyPdfRequestFile, configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[int] = OMIT, - max_resolution: typing.Optional[int] = OMIT, + density: typing.Optional[float] = OMIT, + max_resolution: typing.Optional[float] = OMIT, entity_types: typing.Optional[EntityTypes] = OMIT, token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, allow_regex: typing.Optional[AllowRegex] = OMIT, @@ -221,10 +221,10 @@ def deidentify_pdf( configuration_id : typing.Optional[ConfigurationId] - density : typing.Optional[int] + density : typing.Optional[float] Pixel density at which to process the PDF file. - max_resolution : typing.Optional[int] + max_resolution : typing.Optional[float] Max resolution at which to process the PDF file. entity_types : typing.Optional[EntityTypes] @@ -1020,8 +1020,8 @@ async def deidentify_pdf( vault_id: VaultId, file: DeidentifyPdfRequestFile, configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[int] = OMIT, - max_resolution: typing.Optional[int] = OMIT, + density: typing.Optional[float] = OMIT, + max_resolution: typing.Optional[float] = OMIT, entity_types: typing.Optional[EntityTypes] = OMIT, token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, allow_regex: typing.Optional[AllowRegex] = OMIT, @@ -1041,10 +1041,10 @@ async def deidentify_pdf( configuration_id : typing.Optional[ConfigurationId] - density : typing.Optional[int] + density : typing.Optional[float] Pixel density at which to process the PDF file. - max_resolution : typing.Optional[int] + max_resolution : typing.Optional[float] Max resolution at which to process the PDF file. entity_types : typing.Optional[EntityTypes] diff --git a/skyflow/generated/rest/files/raw_client.py b/skyflow/generated/rest/files/raw_client.py index 5a67292f..c0e535ea 100644 --- a/skyflow/generated/rest/files/raw_client.py +++ b/skyflow/generated/rest/files/raw_client.py @@ -287,8 +287,8 @@ def deidentify_pdf( vault_id: VaultId, file: DeidentifyPdfRequestFile, configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[int] = OMIT, - max_resolution: typing.Optional[int] = OMIT, + density: typing.Optional[float] = OMIT, + max_resolution: typing.Optional[float] = OMIT, entity_types: typing.Optional[EntityTypes] = OMIT, token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, allow_regex: typing.Optional[AllowRegex] = OMIT, @@ -308,10 +308,10 @@ def deidentify_pdf( configuration_id : typing.Optional[ConfigurationId] - density : typing.Optional[int] + density : typing.Optional[float] Pixel density at which to process the PDF file. - max_resolution : typing.Optional[int] + max_resolution : typing.Optional[float] Max resolution at which to process the PDF file. entity_types : typing.Optional[EntityTypes] @@ -1575,8 +1575,8 @@ async def deidentify_pdf( vault_id: VaultId, file: DeidentifyPdfRequestFile, configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[int] = OMIT, - max_resolution: typing.Optional[int] = OMIT, + density: typing.Optional[float] = OMIT, + max_resolution: typing.Optional[float] = OMIT, entity_types: typing.Optional[EntityTypes] = OMIT, token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, allow_regex: typing.Optional[AllowRegex] = OMIT, @@ -1596,10 +1596,10 @@ async def deidentify_pdf( configuration_id : typing.Optional[ConfigurationId] - density : typing.Optional[int] + density : typing.Optional[float] Pixel density at which to process the PDF file. - max_resolution : typing.Optional[int] + max_resolution : typing.Optional[float] Max resolution at which to process the PDF file. entity_types : typing.Optional[EntityTypes] diff --git a/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py b/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py index d1ff8c83..bc0c338c 100644 --- a/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py +++ b/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py @@ -2,4 +2,4 @@ import typing -DeidentifyImageRequestMaskingMethod = typing.Union[typing.Literal["blackout", "blur"], typing.Any] +DeidentifyImageRequestMaskingMethod = typing.Union[typing.Literal["blackbox", "blur"], typing.Any] diff --git a/skyflow/generated/rest/guardrails/client.py b/skyflow/generated/rest/guardrails/client.py index 169f7de1..e7fe1e05 100644 --- a/skyflow/generated/rest/guardrails/client.py +++ b/skyflow/generated/rest/guardrails/client.py @@ -68,10 +68,8 @@ def check_guardrails( token="YOUR_TOKEN", ) client.guardrails.check_guardrails( - vault_id="VAULT_ID", - text="I love to play cricket.", - check_toxicity=True, - deny_topics=["sports"], + vault_id="vault_id", + text="text", ) """ _response = self._raw_client.check_guardrails( @@ -145,10 +143,8 @@ async def check_guardrails( async def main() -> None: await client.guardrails.check_guardrails( - vault_id="VAULT_ID", - text="I love to play cricket.", - check_toxicity=True, - deny_topics=["sports"], + vault_id="vault_id", + text="text", ) diff --git a/skyflow/generated/rest/records/client.py b/skyflow/generated/rest/records/client.py index 1f727bfc..cfe15a1c 100644 --- a/skyflow/generated/rest/records/client.py +++ b/skyflow/generated/rest/records/client.py @@ -5,6 +5,7 @@ from .. import core from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions +from ..types.upload_file_v_2_response import UploadFileV2Response from ..types.v_1_batch_operation_response import V1BatchOperationResponse from ..types.v_1_batch_record import V1BatchRecord from ..types.v_1_bulk_delete_record_response import V1BulkDeleteRecordResponse @@ -700,6 +701,72 @@ def file_service_get_file_scan_status( ) return _response.data + def upload_file_v_2( + self, + vault_id: str, + *, + table_name: str, + column_name: str, + file: core.File, + skyflow_id: typing.Optional[str] = OMIT, + return_file_metadata: typing.Optional[bool] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> UploadFileV2Response: + """ + Uploads the specified file to a record. If an existing record isn't specified, creates a new record and uploads the file to that record. + + Parameters + ---------- + vault_id : str + ID of the vault. + + table_name : str + Name of the table to upload the file to. + + column_name : str + Name of the column to upload the file to. The column must have a `file` data type. + + file : core.File + See core.File for more documentation + + skyflow_id : typing.Optional[str] + Skyflow ID of the record to upload the file to. If `skyflowID` isn't specified, a new record will be created. + + return_file_metadata : typing.Optional[bool] + If `true`, returns metadata about the uploaded file. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + UploadFileV2Response + File uploaded successfully. + + Examples + -------- + from skyflow import Skyflow + + client = Skyflow( + token="YOUR_TOKEN", + ) + client.records.upload_file_v_2( + vault_id="d4410ea01d83473ca09a24c6b03096d4", + table_name="tableName", + column_name="columnName", + ) + """ + _response = self._raw_client.upload_file_v_2( + vault_id, + table_name=table_name, + column_name=column_name, + file=file, + skyflow_id=skyflow_id, + return_file_metadata=return_file_metadata, + request_options=request_options, + ) + return _response.data + class AsyncRecordsClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): @@ -1455,3 +1522,77 @@ async def main() -> None: vault_id, table_name, id, column_name, request_options=request_options ) return _response.data + + async def upload_file_v_2( + self, + vault_id: str, + *, + table_name: str, + column_name: str, + file: core.File, + skyflow_id: typing.Optional[str] = OMIT, + return_file_metadata: typing.Optional[bool] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> UploadFileV2Response: + """ + Uploads the specified file to a record. If an existing record isn't specified, creates a new record and uploads the file to that record. + + Parameters + ---------- + vault_id : str + ID of the vault. + + table_name : str + Name of the table to upload the file to. + + column_name : str + Name of the column to upload the file to. The column must have a `file` data type. + + file : core.File + See core.File for more documentation + + skyflow_id : typing.Optional[str] + Skyflow ID of the record to upload the file to. If `skyflowID` isn't specified, a new record will be created. + + return_file_metadata : typing.Optional[bool] + If `true`, returns metadata about the uploaded file. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + UploadFileV2Response + File uploaded successfully. + + Examples + -------- + import asyncio + + from skyflow import AsyncSkyflow + + client = AsyncSkyflow( + token="YOUR_TOKEN", + ) + + + async def main() -> None: + await client.records.upload_file_v_2( + vault_id="d4410ea01d83473ca09a24c6b03096d4", + table_name="tableName", + column_name="columnName", + ) + + + asyncio.run(main()) + """ + _response = await self._raw_client.upload_file_v_2( + vault_id, + table_name=table_name, + column_name=column_name, + file=file, + skyflow_id=skyflow_id, + return_file_metadata=return_file_metadata, + request_options=request_options, + ) + return _response.data diff --git a/skyflow/generated/rest/records/raw_client.py b/skyflow/generated/rest/records/raw_client.py index e2bfdc92..b42e0bc9 100644 --- a/skyflow/generated/rest/records/raw_client.py +++ b/skyflow/generated/rest/records/raw_client.py @@ -11,7 +11,12 @@ from ..core.pydantic_utilities import parse_obj_as from ..core.request_options import RequestOptions from ..core.serialization import convert_and_respect_annotation_metadata +from ..errors.bad_request_error import BadRequestError +from ..errors.internal_server_error import InternalServerError from ..errors.not_found_error import NotFoundError +from ..errors.unauthorized_error import UnauthorizedError +from ..types.error_response import ErrorResponse +from ..types.upload_file_v_2_response import UploadFileV2Response from ..types.v_1_batch_operation_response import V1BatchOperationResponse from ..types.v_1_batch_record import V1BatchRecord from ..types.v_1_bulk_delete_record_response import V1BulkDeleteRecordResponse @@ -804,6 +809,123 @@ def file_service_get_file_scan_status( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + def upload_file_v_2( + self, + vault_id: str, + *, + table_name: str, + column_name: str, + file: core.File, + skyflow_id: typing.Optional[str] = OMIT, + return_file_metadata: typing.Optional[bool] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[UploadFileV2Response]: + """ + Uploads the specified file to a record. If an existing record isn't specified, creates a new record and uploads the file to that record. + + Parameters + ---------- + vault_id : str + ID of the vault. + + table_name : str + Name of the table to upload the file to. + + column_name : str + Name of the column to upload the file to. The column must have a `file` data type. + + file : core.File + See core.File for more documentation + + skyflow_id : typing.Optional[str] + Skyflow ID of the record to upload the file to. If `skyflowID` isn't specified, a new record will be created. + + return_file_metadata : typing.Optional[bool] + If `true`, returns metadata about the uploaded file. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + HttpResponse[UploadFileV2Response] + File uploaded successfully. + """ + _response = self._client_wrapper.httpx_client.request( + f"v2/vaults/{jsonable_encoder(vault_id)}/files/upload", + method="POST", + data={ + "tableName": table_name, + "columnName": column_name, + "skyflowID": skyflow_id, + "returnFileMetadata": return_file_metadata, + }, + files={ + "file": file, + }, + request_options=request_options, + omit=OMIT, + force_multipart=True, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + UploadFileV2Response, + parse_obj_as( + type_=UploadFileV2Response, # type: ignore + object_=_response.json(), + ), + ) + return HttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + ErrorResponse, + parse_obj_as( + type_=ErrorResponse, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + class AsyncRawRecordsClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): @@ -1577,3 +1699,120 @@ async def file_service_get_file_scan_status( except JSONDecodeError: raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) + + async def upload_file_v_2( + self, + vault_id: str, + *, + table_name: str, + column_name: str, + file: core.File, + skyflow_id: typing.Optional[str] = OMIT, + return_file_metadata: typing.Optional[bool] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[UploadFileV2Response]: + """ + Uploads the specified file to a record. If an existing record isn't specified, creates a new record and uploads the file to that record. + + Parameters + ---------- + vault_id : str + ID of the vault. + + table_name : str + Name of the table to upload the file to. + + column_name : str + Name of the column to upload the file to. The column must have a `file` data type. + + file : core.File + See core.File for more documentation + + skyflow_id : typing.Optional[str] + Skyflow ID of the record to upload the file to. If `skyflowID` isn't specified, a new record will be created. + + return_file_metadata : typing.Optional[bool] + If `true`, returns metadata about the uploaded file. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + AsyncHttpResponse[UploadFileV2Response] + File uploaded successfully. + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/vaults/{jsonable_encoder(vault_id)}/files/upload", + method="POST", + data={ + "tableName": table_name, + "columnName": column_name, + "skyflowID": skyflow_id, + "returnFileMetadata": return_file_metadata, + }, + files={ + "file": file, + }, + request_options=request_options, + omit=OMIT, + force_multipart=True, + ) + try: + if 200 <= _response.status_code < 300: + _data = typing.cast( + UploadFileV2Response, + parse_obj_as( + type_=UploadFileV2Response, # type: ignore + object_=_response.json(), + ), + ) + return AsyncHttpResponse(response=_response, data=_data) + if _response.status_code == 400: + raise BadRequestError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 401: + raise UnauthorizedError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) + if _response.status_code == 500: + raise InternalServerError( + headers=dict(_response.headers), + body=typing.cast( + ErrorResponse, + parse_obj_as( + type_=ErrorResponse, # type: ignore + object_=_response.json(), + ), + ), + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) + raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) diff --git a/skyflow/generated/rest/types/__init__.py b/skyflow/generated/rest/types/__init__.py index 5a48e4f4..92d826c9 100644 --- a/skyflow/generated/rest/types/__init__.py +++ b/skyflow/generated/rest/types/__init__.py @@ -27,12 +27,12 @@ from .entity_types import EntityTypes from .error_response import ErrorResponse from .error_response_error import ErrorResponseError -from .error_string import ErrorString from .googlerpc_status import GooglerpcStatus from .protobuf_any import ProtobufAny from .redaction_enum_redaction import RedactionEnumRedaction from .reidentify_file_response import ReidentifyFileResponse from .reidentify_file_response_output import ReidentifyFileResponseOutput +from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus from .reidentify_string_response import ReidentifyStringResponse from .request_action_type import RequestActionType @@ -45,6 +45,7 @@ from .transformations import Transformations from .transformations_shift_dates import TransformationsShiftDates from .transformations_shift_dates_entity_types_item import TransformationsShiftDatesEntityTypesItem +from .upload_file_v_2_response import UploadFileV2Response from .uuid_ import Uuid from .v_1_audit_after_options import V1AuditAfterOptions from .v_1_audit_event_response import V1AuditEventResponse @@ -105,12 +106,12 @@ "EntityTypes", "ErrorResponse", "ErrorResponseError", - "ErrorString", "GooglerpcStatus", "ProtobufAny", "RedactionEnumRedaction", "ReidentifyFileResponse", "ReidentifyFileResponseOutput", + "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", "ReidentifyStringResponse", "RequestActionType", @@ -123,6 +124,7 @@ "Transformations", "TransformationsShiftDates", "TransformationsShiftDatesEntityTypesItem", + "UploadFileV2Response", "Uuid", "V1AuditAfterOptions", "V1AuditEventResponse", diff --git a/skyflow/generated/rest/types/deidentify_status_response.py b/skyflow/generated/rest/types/deidentify_status_response.py index a276963c..712a85b2 100644 --- a/skyflow/generated/rest/types/deidentify_status_response.py +++ b/skyflow/generated/rest/types/deidentify_status_response.py @@ -24,7 +24,7 @@ class DeidentifyStatusResponse(UniversalBaseModel): How the input file was specified. """ - output_type: typing.Optional[DeidentifyStatusResponseOutputType] = pydantic.Field(default=None) + output_type: DeidentifyStatusResponseOutputType = pydantic.Field() """ How the output file is specified. """ @@ -49,7 +49,7 @@ class DeidentifyStatusResponse(UniversalBaseModel): Size of the processed text in kilobytes (KB). """ - duration: typing.Optional[int] = pydantic.Field(default=None) + duration: typing.Optional[float] = pydantic.Field(default=None) """ Duration of the processed audio in seconds. """ diff --git a/skyflow/generated/rest/types/deidentify_status_response_output_type.py b/skyflow/generated/rest/types/deidentify_status_response_output_type.py index 571801c1..051cc31a 100644 --- a/skyflow/generated/rest/types/deidentify_status_response_output_type.py +++ b/skyflow/generated/rest/types/deidentify_status_response_output_type.py @@ -2,4 +2,4 @@ import typing -DeidentifyStatusResponseOutputType = typing.Union[typing.Literal["base64", "efs_path"], typing.Any] +DeidentifyStatusResponseOutputType = typing.Union[typing.Literal["BASE64", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/deidentify_status_response_status.py b/skyflow/generated/rest/types/deidentify_status_response_status.py index 40262092..9ec2931b 100644 --- a/skyflow/generated/rest/types/deidentify_status_response_status.py +++ b/skyflow/generated/rest/types/deidentify_status_response_status.py @@ -2,4 +2,4 @@ import typing -DeidentifyStatusResponseStatus = typing.Union[typing.Literal["failed", "in_progress", "success"], typing.Any] +DeidentifyStatusResponseStatus = typing.Union[typing.Literal["FAILED", "IN_PROGRESS", "SUCCESS", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/entity_type.py b/skyflow/generated/rest/types/entity_type.py index 20195417..1a343410 100644 --- a/skyflow/generated/rest/types/entity_type.py +++ b/skyflow/generated/rest/types/entity_type.py @@ -15,8 +15,8 @@ "credit_card_expiration", "cvv", "date", - "day", "date_interval", + "day", "dob", "dose", "driver_license", @@ -58,10 +58,10 @@ "passport_number", "password", "phone_number", - "project", "physical_attribute", "political_affiliation", "product", + "project", "religion", "routing_number", "sexuality", diff --git a/skyflow/generated/rest/types/error_string.py b/skyflow/generated/rest/types/error_string.py deleted file mode 100644 index 068b4a84..00000000 --- a/skyflow/generated/rest/types/error_string.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -ErrorString = str diff --git a/skyflow/generated/rest/types/reidentify_file_response.py b/skyflow/generated/rest/types/reidentify_file_response.py index c67b41ac..bd90fb49 100644 --- a/skyflow/generated/rest/types/reidentify_file_response.py +++ b/skyflow/generated/rest/types/reidentify_file_response.py @@ -5,6 +5,7 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel from .reidentify_file_response_output import ReidentifyFileResponseOutput +from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus @@ -18,7 +19,7 @@ class ReidentifyFileResponse(UniversalBaseModel): Status of the re-identify operation. """ - output_type: typing.Literal["BASE64"] = pydantic.Field(default="BASE64") + output_type: ReidentifyFileResponseOutputType = pydantic.Field() """ Format of the output file. """ diff --git a/skyflow/generated/rest/types/reidentify_file_response_output_type.py b/skyflow/generated/rest/types/reidentify_file_response_output_type.py new file mode 100644 index 00000000..03048c85 --- /dev/null +++ b/skyflow/generated/rest/types/reidentify_file_response_output_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +ReidentifyFileResponseOutputType = typing.Union[typing.Literal["BASE64", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/reidentify_file_response_status.py b/skyflow/generated/rest/types/reidentify_file_response_status.py index c640c3a6..8bdfa1e0 100644 --- a/skyflow/generated/rest/types/reidentify_file_response_status.py +++ b/skyflow/generated/rest/types/reidentify_file_response_status.py @@ -2,4 +2,4 @@ import typing -ReidentifyFileResponseStatus = typing.Union[typing.Literal["failed", "in_progress", "success"], typing.Any] +ReidentifyFileResponseStatus = typing.Union[typing.Literal["FAILED", "IN_PROGRESS", "SUCCESS", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/reidentify_string_response.py b/skyflow/generated/rest/types/reidentify_string_response.py index 8284806b..cbb1b836 100644 --- a/skyflow/generated/rest/types/reidentify_string_response.py +++ b/skyflow/generated/rest/types/reidentify_string_response.py @@ -11,7 +11,7 @@ class ReidentifyStringResponse(UniversalBaseModel): Re-identify string response. """ - processed_text: typing.Optional[str] = pydantic.Field(default=None) + text: typing.Optional[str] = pydantic.Field(default=None) """ Re-identified text. """ diff --git a/skyflow/generated/rest/types/upload_file_v_2_response.py b/skyflow/generated/rest/types/upload_file_v_2_response.py new file mode 100644 index 00000000..f1bcc215 --- /dev/null +++ b/skyflow/generated/rest/types/upload_file_v_2_response.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata + + +class UploadFileV2Response(UniversalBaseModel): + """ + Response schema for uploading a file, optionally creating a new record. + """ + + skyflow_id: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="skyflowID")] = pydantic.Field( + default=None + ) + """ + Skyflow ID of the record the file was uploaded to. + """ + + file_metadata: typing_extensions.Annotated[ + typing.Optional[typing.Optional[typing.Any]], FieldMetadata(alias="fileMetadata") + ] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/utils/_skyflow_messages.py b/skyflow/utils/_skyflow_messages.py index 460ca29e..8401aeb6 100644 --- a/skyflow/utils/_skyflow_messages.py +++ b/skyflow/utils/_skyflow_messages.py @@ -100,6 +100,8 @@ class Error(Enum): INVALID_TABLE_VALUE = f"{error_prefix} Validation error. Invalid type of table. Specify table as a string" EMPTY_RECORD_IDS_IN_DELETE = f"{error_prefix} Validation error. 'record ids' array can't be empty. Specify one or more record ids." BULK_DELETE_FAILURE = f"{error_prefix} Delete operation failed." + EMPTY_SKYFLOW_ID= f"{error_prefix} Validation error. skyflow_id can't be empty." + INVALID_FILE_COLUMN_NAME= f"{error_prefix} Validation error. 'column_name' can't be empty." INVALID_QUERY_TYPE = f"{error_prefix} Validation error. Query parameter is of type {{}}. Specify as a string." EMPTY_QUERY = f"{error_prefix} Validation error. Query parameter can't be empty. Specify as a string." @@ -198,6 +200,7 @@ class Error(Enum): INVALID_FILE_OR_ENCODED_FILE= f"{error_prefix} . Error while decoding base64 and saving file" INVALID_FILE_TYPE = f"{error_prefix} Validation error. Invalid file type. Specify a valid file type." INVALID_FILE_NAME= f"{error_prefix} Validation error. Invalid file name. Specify a valid file name." + INVALID_FILE_PATH= f"{error_prefix} Validation error. Invalid file path. Specify a valid file path." INVALID_DEIDENTIFY_FILE_PATH= f"{error_prefix} Validation error. Invalid file path. Specify a valid file path." INVALID_BASE64_HEADER= f"{error_prefix} Validation error. Invalid base64 header. Specify a valid base64 header." INVALID_WAIT_TIME= f"{error_prefix} Validation error. Invalid wait time. Specify a valid wait time as number and should not be greater than 64 secs." @@ -271,6 +274,12 @@ class Info(Enum): TOKENIZE_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Tokenize request resolved." TOKENIZE_SUCCESS = f"{INFO}: [{error_prefix}] Data tokenized." + FILE_UPLOAD_TRIGGERED = f"{INFO}: [{error_prefix}] File upload method triggered." + VALIDATING_FILE_UPLOAD_REQUEST = f"{INFO}: [{error_prefix}] Validating file upload request." + FILE_UPLOAD_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] File upload request resolved." + FILE_UPLOAD_SUCCESS = f"{INFO}: [{error_prefix}] File uploaded successfully." + FILE_UPLOAD_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] File upload failed." + INVOKE_CONNECTION_TRIGGERED = f"{INFO}: [{error_prefix}] Invoke connection method triggered." VALIDATING_INVOKE_CONNECTION_REQUEST = f"{INFO}: [{error_prefix}] Validating invoke connection request." INVOKE_CONNECTION_REQUEST_RESOLVED = f"{INFO}: [{error_prefix}] Invoke connection request resolved." diff --git a/skyflow/utils/validations/__init__.py b/skyflow/utils/validations/__init__.py index b8ce13c8..2f0bc710 100644 --- a/skyflow/utils/validations/__init__.py +++ b/skyflow/utils/validations/__init__.py @@ -12,6 +12,7 @@ validate_update_request, validate_detokenize_request, validate_tokenize_request, + validate_file_upload_request, validate_invoke_connection_params, validate_deidentify_text_request, validate_reidentify_text_request, diff --git a/skyflow/utils/validations/_validations.py b/skyflow/utils/validations/_validations.py index bbca6e85..f88388ad 100644 --- a/skyflow/utils/validations/_validations.py +++ b/skyflow/utils/validations/_validations.py @@ -1,3 +1,4 @@ +import base64 import json import os from skyflow.generated.rest import TokenType @@ -692,6 +693,69 @@ def validate_tokenize_request(logger, request): log_error_log(SkyflowMessages.ErrorLogs.EMPTY_COLUMN_GROUP_IN_COLUMN_VALUES.value.format("TOKENIZE"), logger = logger) raise SkyflowError(SkyflowMessages.Error.EMPTY_TOKENIZE_PARAMETER_COLUMN_GROUP.value.format(i), invalid_input_error_code) + +def validate_file_upload_request(logger, request): + if request is None: + raise SkyflowError(SkyflowMessages.Error.INVALID_TABLE_VALUE.value, invalid_input_error_code) + + # Table + table = getattr(request, "table", None) + if table is None: + raise SkyflowError(SkyflowMessages.Error.INVALID_TABLE_VALUE.value, invalid_input_error_code) + elif table.strip() == "": + raise SkyflowError(SkyflowMessages.Error.EMPTY_TABLE_VALUE.value, invalid_input_error_code) + + # Skyflow ID + skyflow_id = getattr(request, "skyflow_id", None) + if skyflow_id is None: + raise SkyflowError(SkyflowMessages.Error.IDS_KEY_ERROR.value, invalid_input_error_code) + elif skyflow_id.strip() == "": + raise SkyflowError(SkyflowMessages.Error.EMPTY_SKYFLOW_ID.value.format("FILE_UPLOAD"), invalid_input_error_code) + + # Column Name + column_name = getattr(request, "column_name", None) + if column_name is None: + raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_COLUMN_NAME.value.format(type(column_name)), invalid_input_error_code) + elif column_name.strip() == "": + logger.error("Empty column name in FILE_UPLOAD") + raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_COLUMN_NAME.value.format(type(column_name)), invalid_input_error_code) + + # File-related attributes + file_path = getattr(request, "file_path", None) + base64_str = getattr(request, "base64", None) + file_object = getattr(request, "file_object", None) + file_name = getattr(request, "file_name", None) + + # Check file_path first if present + if not is_none_or_empty(file_path): + if not os.path.exists(file_path) or not os.path.isfile(file_path): + raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_PATH.value, invalid_input_error_code) + return + + # Check base64 if present + if not is_none_or_empty(base64_str): + if is_none_or_empty(file_name): + raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_NAME.value, invalid_input_error_code) + try: + base64.b64decode(base64_str) + except Exception: + raise SkyflowError(SkyflowMessages.Error.INVALID_BASE64_STRING.value, invalid_input_error_code) + return + + # Check file_object if present + if file_object is not None: + try: + file_object.seek(0, 1) + return + except Exception: + raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_OBJECT.value, invalid_input_error_code) + + # If none of the above, raise missing file source error + raise SkyflowError(SkyflowMessages.Error.MISSING_FILE_SOURCE.value, invalid_input_error_code) + +def is_none_or_empty(value: str) -> bool: + return value is None or (isinstance(value, str) and value.strip() == "") + def validate_invoke_connection_params(logger, query_params, path_params): if not isinstance(path_params, dict): raise SkyflowError(SkyflowMessages.Error.INVALID_PATH_PARAMS.value, invalid_input_error_code) diff --git a/skyflow/vault/controller/_vault.py b/skyflow/vault/controller/_vault.py index 4602cf87..fe921293 100644 --- a/skyflow/vault/controller/_vault.py +++ b/skyflow/vault/controller/_vault.py @@ -1,6 +1,10 @@ +import base64 import json +import os +from typing import Optional from skyflow.generated.rest import V1FieldRecords, V1BatchRecord, V1TokenizeRecordRequest, \ V1DetokenizeRecordRequest +from skyflow.generated.rest.core.file import File from skyflow.utils import SkyflowMessages, parse_insert_response, \ handle_exception, parse_update_record_response, parse_delete_response, parse_detokenize_response, \ parse_tokenize_response, parse_query_response, parse_get_response, encode_column_values, get_metrics @@ -8,8 +12,8 @@ from skyflow.utils.enums import RequestMethod from skyflow.utils.logger import log_info, log_error_log from skyflow.utils.validations import validate_insert_request, validate_delete_request, validate_query_request, \ - validate_get_request, validate_update_request, validate_detokenize_request, validate_tokenize_request -from skyflow.vault.data import InsertRequest, UpdateRequest, DeleteRequest, GetRequest, QueryRequest + validate_get_request, validate_update_request, validate_detokenize_request, validate_tokenize_request, validate_file_upload_request +from skyflow.vault.data import InsertRequest, UpdateRequest, DeleteRequest, GetRequest, QueryRequest, FileUploadRequest, FileUploadResponse from skyflow.vault.tokens import DetokenizeRequest, TokenizeRequest class Vault: @@ -62,7 +66,27 @@ def __build_insert_body(self, request: InsertRequest): else: records_list = self.__build_bulk_field_records(request.values, request.tokens) return records_list + + def __get_file_for_file_upload(self, request: FileUploadRequest) -> Optional[File]: + if request.file_path: + if not request.file_name: + request.file_name = os.path.basename(request.file_path) + with open(request.file_path, "rb") as f: + file_bytes = f.read() + return (request.file_name, file_bytes) + + elif request.base64 and request.file_name: + decoded_bytes = base64.b64decode(request.base64) + return (request.file_name, decoded_bytes) + + elif request.file_object is not None: + if hasattr(request.file_object, "name") and request.file_object.name: + file_name = os.path.basename(request.file_object.name) + return (file_name, request.file_object) + + return None + def __get_headers(self): headers = { SKY_META_DATA_HEADER: json.dumps(get_metrics()) @@ -244,4 +268,31 @@ def tokenize(self, request: TokenizeRequest): return tokenize_response except Exception as e: log_error_log(SkyflowMessages.ErrorLogs.TOKENIZE_REQUEST_REJECTED.value, logger = self.__vault_client.get_logger()) - handle_exception(e, self.__vault_client.get_logger()) \ No newline at end of file + handle_exception(e, self.__vault_client.get_logger()) + + def upload_file(self, request: FileUploadRequest): + log_info(SkyflowMessages.Info.FILE_UPLOAD_TRIGGERED.value, self.__vault_client.get_logger()) + log_info(SkyflowMessages.Info.VALIDATING_FILE_UPLOAD_REQUEST.value, self.__vault_client.get_logger()) + validate_file_upload_request(self.__vault_client.get_logger(), request) + self.__initialize() + file_upload_api = self.__vault_client.get_records_api().with_raw_response + try: + api_response = file_upload_api.upload_file_v_2( + self.__vault_client.get_vault_id(), + table_name=request.table, + column_name=request.column_name, + file=self.__get_file_for_file_upload(request), + skyflow_id=request.skyflow_id, + return_file_metadata= False, + request_options=self.__get_headers() + ) + log_info(SkyflowMessages.Info.FILE_UPLOAD_REQUEST_RESOLVED.value, self.__vault_client.get_logger()) + log_info(SkyflowMessages.Info.FILE_UPLOAD_SUCCESS.value, self.__vault_client.get_logger()) + upload_response = FileUploadResponse( + skyflow_id=api_response.data.skyflow_id, + errors=None + ) + return upload_response + except Exception as e: + log_error_log(SkyflowMessages.ErrorLogs.FILE_UPLOAD_REQUEST_REJECTED.value, logger = self.__vault_client.get_logger()) + handle_exception(e, self.__vault_client.get_logger()) diff --git a/skyflow/vault/data/__init__.py b/skyflow/vault/data/__init__.py index b43b23cf..d711f4f6 100644 --- a/skyflow/vault/data/__init__.py +++ b/skyflow/vault/data/__init__.py @@ -8,4 +8,6 @@ from ._update_response import UpdateResponse from ._upload_file_request import UploadFileRequest from ._query_request import QueryRequest -from ._query_response import QueryResponse \ No newline at end of file +from ._query_response import QueryResponse +from ._file_upload_request import FileUploadRequest +from ._file_upload_response import FileUploadResponse \ No newline at end of file diff --git a/skyflow/vault/data/_file_upload_request.py b/skyflow/vault/data/_file_upload_request.py new file mode 100644 index 00000000..d1bd4a44 --- /dev/null +++ b/skyflow/vault/data/_file_upload_request.py @@ -0,0 +1,18 @@ +from typing import BinaryIO + +class FileUploadRequest: + def __init__(self, + table: str, + skyflow_id: str, + column_name: str, + file_path: str= None, + base64: str= None, + file_object: BinaryIO= None, + file_name: str= None): + self.table = table + self.skyflow_id = skyflow_id + self.column_name = column_name + self.file_path = file_path + self.base64 = base64 + self.file_object = file_object + self.file_name = file_name diff --git a/skyflow/vault/data/_file_upload_response.py b/skyflow/vault/data/_file_upload_response.py new file mode 100644 index 00000000..18218f08 --- /dev/null +++ b/skyflow/vault/data/_file_upload_response.py @@ -0,0 +1,6 @@ +class FileUploadResponse: + def __init__(self, + skyflow_id, + errors): + self.skyflow_id = skyflow_id + self.errors = errors diff --git a/tests/vault/controller/test__vault.py b/tests/vault/controller/test__vault.py index 0c8a7743..8d1d1ab0 100644 --- a/tests/vault/controller/test__vault.py +++ b/tests/vault/controller/test__vault.py @@ -1,12 +1,14 @@ import unittest -from unittest.mock import Mock, patch +from unittest.mock import Mock, patch, mock_open as mock_open_func, mock_open from skyflow.generated.rest import V1BatchRecord, V1FieldRecords, V1DetokenizeRecordRequest, V1TokenizeRecordRequest +from skyflow.utils._skyflow_messages import SkyflowMessages from skyflow.utils.enums import RedactionType, TokenMode from skyflow.vault.controller import Vault from skyflow.vault.data import InsertRequest, InsertResponse, UpdateResponse, UpdateRequest, DeleteResponse, \ - DeleteRequest, GetRequest, GetResponse, QueryRequest, QueryResponse + DeleteRequest, GetRequest, GetResponse, QueryRequest, QueryResponse, FileUploadRequest from skyflow.vault.tokens import DetokenizeRequest, DetokenizeResponse, TokenizeResponse, TokenizeRequest - +from skyflow.error import SkyflowError +from skyflow.utils.validations import validate_file_upload_request VAULT_ID = "test_vault_id" TABLE_NAME = "test_table" @@ -598,3 +600,277 @@ def test_tokenize_handles_generic_error(self, mock_validate): self.vault.tokenize(request) tokens_api.record_service_tokenize.assert_called_once() + + @patch("skyflow.vault.controller._vault.validate_file_upload_request") + def test_upload_file_with_file_path_successful(self, mock_validate): + """Test upload_file functionality using file path.""" + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_path="/path/to/test.txt", + ) + + # Mock file open + mocked_open = mock_open_func(read_data=b"test file content") + + # Mock API response + mock_api_response = Mock() + mock_api_response.data = Mock(skyflow_id="123") + + records_api = self.vault_client.get_records_api.return_value + records_api.with_raw_response.upload_file_v_2.return_value = mock_api_response + + with patch('builtins.open', mocked_open): + result = self.vault.upload_file(request) + mock_validate.assert_called_once_with(self.vault_client.get_logger(), request) + mocked_open.assert_called_once_with("/path/to/test.txt", "rb") + self.assertEqual(result.skyflow_id, "123") + self.assertIsNone(result.errors) + + @patch("skyflow.vault.controller._vault.validate_file_upload_request") + def test_upload_file_with_base64_successful(self, mock_validate): + """Test upload_file functionality using base64 content.""" + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + base64="dGVzdCBmaWxlIGNvbnRlbnQ=", # "test file content" in base64 + file_name="test.txt" + ) + + # Mock API response + mock_api_response = Mock() + mock_api_response.data = Mock(skyflow_id="123") + + records_api = self.vault_client.get_records_api.return_value + records_api.with_raw_response.upload_file_v_2.return_value = mock_api_response + + # Call upload_file + result = self.vault.upload_file(request) + mock_validate.assert_called_once_with(self.vault_client.get_logger(), request) + self.assertEqual(result.skyflow_id, "123") + self.assertIsNone(result.errors) + + @patch("skyflow.vault.controller._vault.validate_file_upload_request") + def test_upload_file_with_file_object_successful(self, mock_validate): + """Test upload_file functionality using file object.""" + + # Create mock file object + mock_file = Mock() + mock_file.name = "test.txt" + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_object=mock_file + ) + + # Mock API response + mock_api_response = Mock() + mock_api_response.data = Mock(skyflow_id="123") + + records_api = self.vault_client.get_records_api.return_value + records_api.with_raw_response.upload_file_v_2.return_value = mock_api_response + + # Call upload_file + result = self.vault.upload_file(request) + mock_validate.assert_called_once_with(self.vault_client.get_logger(), request) + self.assertEqual(result.skyflow_id, "123") + self.assertIsNone(result.errors) + + @patch("skyflow.vault.controller._vault.validate_file_upload_request") + def test_upload_file_handles_api_error(self, mock_validate): + """Test upload_file error handling for API errors.""" + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_path="/path/to/test.txt" + ) + + # Mock API error + records_api = self.vault_client.get_records_api.return_value + records_api.with_raw_response.upload_file_v_2.side_effect = Exception("Upload failed") + + # Assert that the exception is propagated + with patch('builtins.open', mock_open(read_data=b"test content")): + with self.assertRaises(Exception): + self.vault.upload_file(request) + mock_validate.assert_called_once_with(self.vault_client.get_logger(), request) + + @patch("skyflow.vault.controller._vault.validate_file_upload_request") + def test_upload_file_with_missing_file_source(self, mock_validate): + """Test upload_file with no file source specified.""" + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123" + ) + + mock_validate.side_effect = SkyflowError(SkyflowMessages.Error.MISSING_FILE_SOURCE.value, + SkyflowMessages.ErrorCodes.INVALID_INPUT.value) + + with self.assertRaises(SkyflowError) as error: + self.vault.upload_file(request) + + self.assertEqual(error.exception.message, SkyflowMessages.Error.MISSING_FILE_SOURCE.value) + mock_validate.assert_called_once_with(self.vault_client.get_logger(), request) + +class TestFileUploadValidation(unittest.TestCase): + def setUp(self): + self.logger = Mock() + + def test_validate_invalid_table(self): + """Test validation fails when table is empty""" + request = FileUploadRequest( + table="", + column_name="file_column", + skyflow_id="123", + file_path="/path/to/file.txt" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.EMPTY_TABLE_VALUE.value) + + def test_validate_empty_skyflow_id(self): + """Test validation fails when skyflow_id is empty""" + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="", + file_path="/path/to/file.txt" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, + SkyflowMessages.Error.EMPTY_SKYFLOW_ID.value.format("FILE_UPLOAD")) + + def test_validate_invalid_column_name(self): + """Test validation fails when column_name is missing""" + request = FileUploadRequest( + table="test_table", + skyflow_id="123", + column_name="", + file_path="/path/to/file.txt" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, + SkyflowMessages.Error.INVALID_FILE_COLUMN_NAME.value.format("FILE_UPLOAD")) + + + @patch('os.path.exists') + @patch('os.path.isfile') + def test_validate_file_path_success(self, mock_isfile, mock_exists): + """Test validation succeeds with valid file path""" + mock_exists.return_value = True + mock_isfile.return_value = True + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_path="/path/to/file.txt" + ) + validate_file_upload_request(self.logger, request) + mock_exists.assert_called_once_with("/path/to/file.txt") + mock_isfile.assert_called_once_with("/path/to/file.txt") + + @patch('os.path.exists') + def test_validate_invalid_file_path(self, mock_exists): + """Test validation fails with invalid file path""" + mock_exists.return_value = False + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_path="/invalid/path.txt" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.INVALID_FILE_PATH.value) + + def test_validate_base64_success(self): + """Test validation succeeds with valid base64""" + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + base64="dGVzdCBmaWxlIGNvbnRlbnQ=", + file_name="test.txt" + ) + validate_file_upload_request(self.logger, request) + + def test_validate_base64_without_filename(self): + """Test validation fails with base64 but no filename""" + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + base64="dGVzdCBmaWxlIGNvbnRlbnQ=" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.INVALID_FILE_NAME.value) + + def test_validate_invalid_base64(self): + """Test validation fails with invalid base64""" + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + base64="invalid-base64", + file_name="test.txt" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.INVALID_BASE64_STRING.value) + + def test_validate_file_object_success(self): + """Test validation succeeds with valid file object""" + mock_file = Mock() + mock_file.seek = Mock() # Add seek method + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_object=mock_file + ) + validate_file_upload_request(self.logger, request) + + def test_validate_invalid_file_object(self): + """Test validation fails with invalid file object""" + mock_file = Mock() + mock_file.seek = Mock(side_effect=Exception()) # Make seek fail + + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123", + file_object=mock_file + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.INVALID_FILE_OBJECT.value) + + def test_validate_missing_file_source(self): + """Test validation fails when no file source is provided""" + request = FileUploadRequest( + table="test_table", + column_name="file_column", + skyflow_id="123" + ) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.MISSING_FILE_SOURCE.value) + with self.assertRaises(SkyflowError) as error: + validate_file_upload_request(self.logger, request) + self.assertEqual(error.exception.message, SkyflowMessages.Error.MISSING_FILE_SOURCE.value) From 3cafb5852cb4eb7d78df0b9de3142bc18c7e2c1b Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Wed, 10 Sep 2025 14:10:00 +0000 Subject: [PATCH 05/42] [AUTOMATED] Private Release 2.1.0b1.dev0+61e368f --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a205e472..b349bede 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '2.1.0b1' +current_version = '2.1.0b1.dev0+61e368f' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index 2e213012..fab22bb1 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '2.1.0b1' \ No newline at end of file +SDK_VERSION = '2.1.0b1.dev0+61e368f' \ No newline at end of file From dc258e041ce472a76f7b9f8550ed271312f26648 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 15:47:23 +0530 Subject: [PATCH 06/42] SK-2292 retry on connection --- skyflow/vault/_client.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e426f59f..b9d858fa 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -45,7 +45,6 @@ def __init__(self, config: Configuration): def insert(self, records: dict, options: InsertOptions = InsertOptions()): interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) - self._checkConfig(interface) jsonBody = getInsertRequestBody(records, options) @@ -57,16 +56,35 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "sky-metadata": json.dumps(getMetrics()) } - response = requests.post(requestURL, data=jsonBody, headers=headers) - processedResponse = processResponse(response) - result, partial = convertResponse(records, processedResponse, options) - if partial: - log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: - log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) - return result + # Use for-loop for retry logic, avoid code repetition + for attempt in range(2): + try: + # If jsonBody is a dict, use json=, else use data= + if isinstance(jsonBody, dict): + response = requests.post(requestURL, json=jsonBody, headers=headers) + else: + response = requests.post(requestURL, data=jsonBody, headers=headers) + processedResponse = processResponse(response) + result, partial = convertResponse(records, processedResponse, options) + if partial: + log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: + log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + return result + except requests.exceptions.ConnectionError as err: + log_error(f'Connection error inserting record: {err}', interface) + if attempt == 0: + log_info("Retrying record...", interface) + continue + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Connection error after retry: {err}", interface=interface) + except Exception as err: + log_error(f'Unexpected error in insert: {err}', interface) + raise + def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From adafc1cc1b618f4d26c868fb764b490a92147580 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 15:51:48 +0530 Subject: [PATCH 07/42] SK-2292 set default retry --- skyflow/vault/_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index b9d858fa..58e28d05 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -55,9 +55,9 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "Authorization": "Bearer " + self.storedToken, "sky-metadata": json.dumps(getMetrics()) } - + max_retries = 1 # Use for-loop for retry logic, avoid code repetition - for attempt in range(2): + for attempt in range(max_retries): try: # If jsonBody is a dict, use json=, else use data= if isinstance(jsonBody, dict): From b3d791cafb37e6e1d0748dde5b389dd07d737178 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 18:48:11 +0530 Subject: [PATCH 08/42] SK-2293 max retry set to 3 --- skyflow/vault/_client.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 58e28d05..785c42eb 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -55,15 +55,12 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "Authorization": "Bearer " + self.storedToken, "sky-metadata": json.dumps(getMetrics()) } - max_retries = 1 + max_retries = 3 # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries): + for attempt in range(max_retries+1): try: # If jsonBody is a dict, use json=, else use data= - if isinstance(jsonBody, dict): - response = requests.post(requestURL, json=jsonBody, headers=headers) - else: - response = requests.post(requestURL, data=jsonBody, headers=headers) + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: @@ -75,16 +72,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result except requests.exceptions.ConnectionError as err: - log_error(f'Connection error inserting record: {err}', interface) - if attempt == 0: - log_info("Retrying record...", interface) + if attempt < max_retries: continue else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Connection error after retry: {err}", interface=interface) - except Exception as err: - log_error(f'Unexpected error in insert: {err}', interface) - raise - + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From bb6c87450d2c2a82f0a1ab305f01979ce0e616f2 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 14:30:46 +0000 Subject: [PATCH 09/42] [AUTOMATED] Public Release - 1.15.2 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c7756728..89ba87d0 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.1' +current_version = '1.15.2' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 4d6b1a07..82208a4e 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.1' \ No newline at end of file +SDK_VERSION = '1.15.2' \ No newline at end of file From 5700874e2eda09325f94dd3c561c2d0b9d7cad87 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 21:52:59 +0530 Subject: [PATCH 10/42] SK-2293 retry on every exception --- skyflow/vault/_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 785c42eb..1c1236c2 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -71,7 +71,7 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: continue else: From 3b47b5ca3d6d4da3e2819f690f4924e6933a8f55 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 16:31:59 +0000 Subject: [PATCH 11/42] [AUTOMATED] Public Release - 1.15.3 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 89ba87d0..9351f06a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.2' +current_version = '1.15.3' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 82208a4e..30ae0eae 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.2' \ No newline at end of file +SDK_VERSION = '1.15.3' \ No newline at end of file From 922f98c977145227dc50b4117243557cf994dc48 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 22:39:07 +0530 Subject: [PATCH 12/42] SK-2293 retry on every exception --- skyflow/vault/_client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 1c1236c2..e21dcbab 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -75,7 +75,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): if attempt < max_retries: continue else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From 450db9d8e16bb6194b7e4a9a536e9e96ab607d1a Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 17:15:17 +0000 Subject: [PATCH 13/42] [AUTOMATED] Public Release - 1.15.4 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9351f06a..323fa31d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.3' +current_version = '1.15.4' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 30ae0eae..5c7ae5de 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.3' \ No newline at end of file +SDK_VERSION = '1.15.4' \ No newline at end of file From 8e1d65351de5918610fddea288c3f468b714a4e6 Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Wed, 17 Sep 2025 16:26:28 +0530 Subject: [PATCH 14/42] SK-2270 fix inconsistencies in python SDK V2 (#193) * SK-2270: fix inconsistencies in python SDK V2 --- README.md | 49 +- samples/detect_api/deidentify_file.py | 4 +- samples/vault_api/insert_byot.py | 2 +- samples/vault_api/insert_records.py | 2 +- skyflow/utils/_skyflow_messages.py | 1 + skyflow/utils/_utils.py | 3 + skyflow/utils/validations/_validations.py | 6 +- skyflow/vault/controller/_detect.py | 57 +- skyflow/vault/controller/_vault.py | 7 +- skyflow/vault/data/_insert_request.py | 4 +- .../vault/detect/_deidentify_file_response.py | 4 +- skyflow/vault/detect/_file_input.py | 6 +- skyflow/vault/tokens/_tokenize_response.py | 5 +- tests/utils/validations/__init__.py | 0 tests/utils/validations/test__validations.py | 1046 +++++++++++++++++ tests/vault/controller/test__detect.py | 19 +- tests/vault/controller/test__vault.py | 8 +- 17 files changed, 1165 insertions(+), 58 deletions(-) create mode 100644 tests/utils/validations/__init__.py create mode 100644 tests/utils/validations/test__validations.py diff --git a/README.md b/README.md index 39a58429..67b0d1c9 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ table_name = '' # Replace with your actual table name # Create Insert Request insert_request = InsertRequest( - table_name=table_name, + table=table_name, values=insert_data, return_tokens=True, # Optional: Get tokens for inserted data continue_on_error=True # Optional: Continue on partial errors @@ -273,7 +273,7 @@ options = InsertOptions( ```python insert_request = InsertRequest( - table_name=table_name, # Replace with the table name + table=table_name, # Replace with the table name values=insert_data, return_tokens=False, # Do not return tokens continue_on_error=False, # Stop inserting if any record fails @@ -474,7 +474,7 @@ try: # Step 2: Create Insert Request insert_request = InsertRequest( - table_name='table1', # Specify the table in the vault where the data will be inserted + table='table1', # Specify the table in the vault where the data will be inserted values=insert_data, # Attach the data (records) to be inserted return_tokens=True, # Specify if tokens should be returned upon successful insertion continue_on_error=True # Optional: Continue on partial errors @@ -551,7 +551,7 @@ try: # Step 2: Build an InsertRequest object with the table name and the data to insert insert_request = InsertRequest( - table_name='', # Replace with the actual table name in your Skyflow vault + table='', # Replace with the actual table name in your Skyflow vault values=insert_data, # Attach the data to be inserted ) @@ -608,7 +608,7 @@ try: # Step 4: Build the InsertRequest object with the data records to insert insert_request = InsertRequest( - table_name='table1', # Specify the table in the vault where the data will be inserted + table='table1', # Specify the table in the vault where the data will be inserted values=insert_data, # Attach the data (records) to be inserted return_tokens=True, # Specify if tokens should be returned upon successful insertion continue_on_error=True # Specify to continue inserting records even if an error occurs for some records @@ -686,7 +686,7 @@ try: # Step 3: Build the InsertRequest object with the upsertData insert_request = InsertRequest( - table_name='table1', # Specify the table in the vault where the data will be inserted + table='table1', # Specify the table in the vault where the data will be inserted values=insert_data, # Attach the data (records) to be inserted return_tokens=True, # Specify if tokens should be returned upon successful insertion upsert='cardholder_name' # Specify the field to be used for upsert operations (e.g., cardholder_name) @@ -1897,23 +1897,24 @@ ReidentifyTextResponse( ``` ### Deidentify File -To deidentify files, use the `deidentify_file` method. The `DeidentifyFileRequest` class creates a deidentify file request, which includes the file to be deidentified and various configuration options. +To deidentify files, use the `deidentify_file` method. The `DeidentifyFileRequest` class creates a deidentify file request, supports providing either a file or a file path in class FileInput for de-identification, along with various configuration options. #### Construct a Deidentify File request ```python from skyflow.error import SkyflowError from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions -from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, Bleep +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, Bleep, FileInput """ This example demonstrates how to deidentify file, along with corresponding DeidentifyFileRequest schema. """ try: # Initialize Skyflow client # Step 1: Open file for deidentification - file = open('', 'rb') # Open the file in read-binary mode + file_path="" + file = open(file_path, 'rb') # Open the file in read-binary mode # Step 2: Create deidentify file request request = DeidentifyFileRequest( - file=file, # File object to deidentify + file=FileInput(file), # File to de-identify (can also provide a file path) entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect # Token format configuration @@ -1971,7 +1972,7 @@ except Exception as error: ```python from skyflow.error import SkyflowError from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions -from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Bleep +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Bleep, FileInput """ * Skyflow Deidentify File Example * @@ -1985,7 +1986,7 @@ try: file = open('sensitive_document.txt', 'rb') # Open the file in read-binary mode # Step 2: Create deidentify file request request = DeidentifyFileRequest( - file=file, # File object to deidentify + file=FileInput(file), # File to de-identify (can also provide a file path) entities=[ DetectEntities.SSN, DetectEntities.CREDIT_CARD @@ -2038,7 +2039,6 @@ DeidentifyFileResponse( ], run_id='83abcdef-2b61-4a83-a4e0-cbc71ffabffd', status='SUCCESS', - errors=[] ) ``` @@ -2121,7 +2121,7 @@ except Exception as error: print('Unexpected Error:', error) # Print the stack trace for debugging purposes ``` -Sample Response +Sample Response: ```python DeidentifyFileResponse( file='TXkgY2FyZCBudW1iZXIgaXMgW0NSRURJVF9DQVJEXQpteSBzZWNvbmQ…', # Base64 encoded file content @@ -2142,7 +2142,26 @@ DeidentifyFileResponse( ], run_id='48ec05ba-96ec-4641-a8e2-35e066afef95', status='SUCCESS', - errors=[] +) +``` + +Incase of invalid/expired RunId: + +```python +DeidentifyFileResponse( + file_base64=None, + file=None, + type='UNKNOWN', + extension=None, + word_count=None, + char_count=None, + size_in_kb=0.0, + duration_in_seconds=None, + page_count=None, + slide_count=None, + entities=[], + run_id='1e9f321f-dd51-4ab1-a014-21212fsdfsd', + status='UNKNOWN' ) ``` diff --git a/samples/detect_api/deidentify_file.py b/samples/detect_api/deidentify_file.py index c9877d58..99b4b26e 100644 --- a/samples/detect_api/deidentify_file.py +++ b/samples/detect_api/deidentify_file.py @@ -1,7 +1,7 @@ from skyflow.error import SkyflowError from skyflow import Env, Skyflow, LogLevel from skyflow.utils.enums import DetectEntities, MaskingMethod, DetectOutputTranscriptions -from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep +from skyflow.vault.detect import DeidentifyFileRequest, TokenFormat, Transformations, DateTransformation, Bleep, FileInput """ * Skyflow Deidentify File Example @@ -39,7 +39,7 @@ def perform_file_deidentification(): file = open(file_path, 'rb') # Step 5: Configure Deidentify File Request with all options deidentify_request = DeidentifyFileRequest( - file=file, # File object to deidentify + file=FileInput(file), # File to de-identify (can also provide a file path) entities=[DetectEntities.SSN, DetectEntities.CREDIT_CARD], # Entities to detect allow_regex_list=[''], # Optional: Patterns to allow restrict_regex_list=[''], # Optional: Patterns to restrict diff --git a/samples/vault_api/insert_byot.py b/samples/vault_api/insert_byot.py index ae4c1eae..5161f886 100644 --- a/samples/vault_api/insert_byot.py +++ b/samples/vault_api/insert_byot.py @@ -70,7 +70,7 @@ def perform_secure_data_insertion_with_byot(): ] insert_request = InsertRequest( - table_name=table_name, + table=table_name, values=insert_data, token_mode=TokenMode.ENABLE, # Enable Bring Your Own Token (BYOT) tokens=tokens, # Specify tokens to use for BYOT diff --git a/samples/vault_api/insert_records.py b/samples/vault_api/insert_records.py index 32ec1fae..76ec2259 100644 --- a/samples/vault_api/insert_records.py +++ b/samples/vault_api/insert_records.py @@ -47,7 +47,7 @@ def perform_secure_data_insertion(): # Step 5: Create Insert Request insert_request = InsertRequest( - table_name=table_name, + table=table_name, values=insert_data, return_tokens=True, # Optional: Get tokens for inserted data continue_on_error=True # Optional: Continue on partial errors diff --git a/skyflow/utils/_skyflow_messages.py b/skyflow/utils/_skyflow_messages.py index 8401aeb6..3672cfa8 100644 --- a/skyflow/utils/_skyflow_messages.py +++ b/skyflow/utils/_skyflow_messages.py @@ -383,6 +383,7 @@ class ErrorLogs(Enum): DEIDENTIFY_FILE_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Deidentify file resulted in failure." DETECT_RUN_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Detect get run resulted in failure." DEIDENTIFY_TEXT_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Deidentify text resulted in failure." + SAVING_DEIDENTIFY_FILE_FAILED = f"{ERROR}: [{error_prefix}] Error while saving deidentified file to output directory." REIDENTIFY_TEXT_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Reidentify text resulted in failure." DETECT_FILE_REQUEST_REJECTED = f"{ERROR}: [{error_prefix}] Deidentify file resulted in failure." diff --git a/skyflow/utils/_utils.py b/skyflow/utils/_utils.py index 77ffe580..114079b5 100644 --- a/skyflow/utils/_utils.py +++ b/skyflow/utils/_utils.py @@ -447,3 +447,6 @@ def encode_column_values(get_request): encoded_column_values.append(quote(column)) return encoded_column_values + +def get_attribute(obj, camel_case, snake_case): + return getattr(obj, camel_case, None) or getattr(obj, snake_case, None) diff --git a/skyflow/utils/validations/_validations.py b/skyflow/utils/validations/_validations.py index f88388ad..4428d11e 100644 --- a/skyflow/utils/validations/_validations.py +++ b/skyflow/utils/validations/_validations.py @@ -277,7 +277,7 @@ def validate_file_from_request(file_input: FileInput): raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_TYPE.value, invalid_input_error_code) # Validate file name - file_name = os.path.splitext(file.name)[0] + file_name, _ = os.path.splitext(os.path.basename(file.name)) if not file_name or not file_name.strip(): raise SkyflowError(SkyflowMessages.Error.INVALID_FILE_NAME.value, invalid_input_error_code) @@ -394,10 +394,10 @@ def validate_deidentify_file_request(logger, request: DeidentifyFileRequest): raise SkyflowError(SkyflowMessages.Error.WAIT_TIME_GREATER_THEN_64.value, invalid_input_error_code) def validate_insert_request(logger, request): - if not isinstance(request.table_name, str): + if not isinstance(request.table, str): log_error_log(SkyflowMessages.ErrorLogs.TABLE_IS_REQUIRED.value.format("INSERT"), logger = logger) raise SkyflowError(SkyflowMessages.Error.INVALID_TABLE_NAME_IN_INSERT.value, invalid_input_error_code) - if not request.table_name.strip(): + if not request.table.strip(): log_error_log(SkyflowMessages.ErrorLogs.EMPTY_TABLE_NAME.value.format("INSERT"), logger = logger) raise SkyflowError(SkyflowMessages.Error.MISSING_TABLE_NAME_IN_INSERT.value, invalid_input_error_code) diff --git a/skyflow/vault/controller/_detect.py b/skyflow/vault/controller/_detect.py index 93fac69e..62d551c1 100644 --- a/skyflow/vault/controller/_detect.py +++ b/skyflow/vault/controller/_detect.py @@ -6,8 +6,9 @@ from skyflow.generated.rest import DeidentifyTextRequestFile, DeidentifyAudioRequestFile, DeidentifyPdfRequestFile, \ DeidentifyImageRequestFile, DeidentifyPresentationRequestFile, DeidentifySpreadsheetRequestFile, \ DeidentifyDocumentRequestFile, DeidentifyFileRequestFile +from skyflow.generated.rest.types.deidentify_status_response import DeidentifyStatusResponse from skyflow.utils._skyflow_messages import SkyflowMessages -from skyflow.utils._utils import get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response +from skyflow.utils._utils import get_attribute, get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response from skyflow.utils.constants import SKY_META_DATA_HEADER from skyflow.utils.logger import log_info, log_error_log from skyflow.utils.validations import validate_deidentify_file_request, validate_get_detect_run_request @@ -83,6 +84,43 @@ def __poll_for_processed_file(self, run_id, max_wait_time=64): except Exception as e: raise e + def __save_deidentify_file_response_output(self, response: DeidentifyStatusResponse, output_directory: str, original_file_name: str, name_without_ext: str): + if not response or not hasattr(response, 'output') or not response.output or not output_directory: + return + + if not os.path.exists(output_directory): + return + + deidentify_file_prefix = "processed-" + output_list = response.output + + base_original_filename = os.path.basename(original_file_name) + base_name_without_ext = os.path.splitext(base_original_filename)[0] + + for idx, output in enumerate(output_list): + try: + processed_file = get_attribute(output, 'processedFile', 'processed_file') + processed_file_type = get_attribute(output, 'processedFileType', 'processed_file_type') + processed_file_extension = get_attribute(output, 'processedFileExtension', 'processed_file_extension') + + if not processed_file: + continue + + decoded_data = base64.b64decode(processed_file) + + if idx == 0 or processed_file_type == 'redacted_file': + output_file_name = os.path.join(output_directory, deidentify_file_prefix + base_original_filename) + if processed_file_extension: + output_file_name = os.path.join(output_directory, f"{deidentify_file_prefix}{base_name_without_ext}.{processed_file_extension}") + else: + output_file_name = os.path.join(output_directory, f"{deidentify_file_prefix}{base_name_without_ext}.{processed_file_extension}") + + with open(output_file_name, 'wb') as f: + f.write(decoded_data) + except Exception as e: + log_error_log(SkyflowMessages.ErrorLogs.SAVING_DEIDENTIFY_FILE_FAILED.value, self.__vault_client.get_logger()) + handle_exception(e, self.__vault_client.get_logger()) + def __parse_deidentify_file_response(self, data, run_id=None, status=None): output = getattr(data, "output", []) status_val = getattr(data, "status", None) or status @@ -141,8 +179,8 @@ def output_to_dict_list(output): return DeidentifyFileResponse( file_base64=base64_string, - file=file_obj, # File class will be instantiated in DeidentifyFileResponse - type=first_output.get("type", None), + file=file_obj, + type=first_output.get("type", "UNKNOWN"), extension=extension, word_count=word_count, char_count=char_count, @@ -153,7 +191,6 @@ def output_to_dict_list(output): entities=entities, run_id=run_id_val, status=status_val, - errors=None ) def __get_token_format(self, request): @@ -396,12 +433,11 @@ def deidentify_file(self, request: DeidentifyFileRequest): run_id = getattr(api_response.data, 'run_id', None) processed_response = self.__poll_for_processed_file(run_id, request.wait_time) - parsed_response = self.__parse_deidentify_file_response(processed_response, run_id) if request.output_directory and processed_response.status == 'SUCCESS': - file_name_only = 'processed-'+os.path.basename(file_name) - output_file_path = f"{request.output_directory}/{file_name_only}" - with open(output_file_path, 'wb') as output_file: - output_file.write(base64.b64decode(parsed_response.file_base64)) + name_without_ext, _ = os.path.splitext(file_name) + self.__save_deidentify_file_response_output(processed_response, request.output_directory, file_name, name_without_ext) + + parsed_response = self.__parse_deidentify_file_response(processed_response, run_id) log_info(SkyflowMessages.Info.DETECT_FILE_SUCCESS.value, self.__vault_client.get_logger()) return parsed_response @@ -411,9 +447,9 @@ def deidentify_file(self, request: DeidentifyFileRequest): handle_exception(e, self.__vault_client.get_logger()) def get_detect_run(self, request: GetDetectRunRequest): + log_info(SkyflowMessages.Info.GET_DETECT_RUN_TRIGGERED.value,self.__vault_client.get_logger()) log_info(SkyflowMessages.Info.VALIDATING_GET_DETECT_RUN_INPUT.value, self.__vault_client.get_logger()) validate_get_detect_run_request(self.__vault_client.get_logger(), request) - log_info(SkyflowMessages.Info.DEIDENTIFY_TEXT_REQUEST_RESOLVED.value, self.__vault_client.get_logger()) self.__initialize() files_api = self.__vault_client.get_detect_file_api().with_raw_response @@ -428,6 +464,7 @@ def get_detect_run(self, request: GetDetectRunRequest): parsed_response = self.__parse_deidentify_file_response(DeidentifyFileResponse(run_id=run_id, status='IN_PROGRESS')) else: parsed_response = self.__parse_deidentify_file_response(response.data, run_id, response.data.status) + log_info(SkyflowMessages.Info.GET_DETECT_RUN_SUCCESS.value,self.__vault_client.get_logger()) return parsed_response except Exception as e: log_error_log(SkyflowMessages.ErrorLogs.DETECT_FILE_REQUEST_REJECTED.value, diff --git a/skyflow/vault/controller/_vault.py b/skyflow/vault/controller/_vault.py index fe921293..7cc9ec77 100644 --- a/skyflow/vault/controller/_vault.py +++ b/skyflow/vault/controller/_vault.py @@ -10,6 +10,7 @@ parse_tokenize_response, parse_query_response, parse_get_response, encode_column_values, get_metrics from skyflow.utils.constants import SKY_META_DATA_HEADER from skyflow.utils.enums import RequestMethod +from skyflow.utils.enums.redaction_type import RedactionType from skyflow.utils.logger import log_info, log_error_log from skyflow.utils.validations import validate_insert_request, validate_delete_request, validate_query_request, \ validate_get_request, validate_update_request, validate_detokenize_request, validate_tokenize_request, validate_file_upload_request @@ -57,7 +58,7 @@ def __build_insert_body(self, request: InsertRequest): records_list = self.__build_batch_field_records( request.values, request.tokens, - request.table_name, + request.table, request.return_tokens, request.upsert ) @@ -109,7 +110,7 @@ def insert(self, request: InsertRequest): else: api_response = records_api.record_service_insert_record(self.__vault_client.get_vault_id(), - request.table_name, records=insert_body,tokenization= request.return_tokens, upsert=request.upsert, homogeneous=request.homogeneous, byot=request.token_mode.value, request_options=self.__get_headers()) + request.table, records=insert_body,tokenization= request.return_tokens, upsert=request.upsert, homogeneous=request.homogeneous, byot=request.token_mode.value, request_options=self.__get_headers()) insert_response = parse_insert_response(api_response, request.continue_on_error) log_info(SkyflowMessages.Info.INSERT_SUCCESS.value, self.__vault_client.get_logger()) @@ -225,7 +226,7 @@ def detokenize(self, request: DetokenizeRequest): tokens_list = [ V1DetokenizeRecordRequest( token=item.get('token'), - redaction=item.get('redaction', None) + redaction=item.get('redaction', RedactionType.DEFAULT) ) for item in request.data ] diff --git a/skyflow/vault/data/_insert_request.py b/skyflow/vault/data/_insert_request.py index 742c5120..909edd88 100644 --- a/skyflow/vault/data/_insert_request.py +++ b/skyflow/vault/data/_insert_request.py @@ -2,7 +2,7 @@ class InsertRequest: def __init__(self, - table_name, + table, values, tokens = None, upsert = None, @@ -10,7 +10,7 @@ def __init__(self, token_mode = TokenMode.DISABLE, return_tokens = True, continue_on_error = False): - self.table_name = table_name + self.table = table self.values = values self.tokens = tokens self.upsert = upsert diff --git a/skyflow/vault/detect/_deidentify_file_response.py b/skyflow/vault/detect/_deidentify_file_response.py index 90a0d493..b340e21c 100644 --- a/skyflow/vault/detect/_deidentify_file_response.py +++ b/skyflow/vault/detect/_deidentify_file_response.py @@ -17,7 +17,6 @@ def __init__( entities: list = None, # list of dicts with keys 'file' and 'extension' run_id: str = None, status: str = None, - errors: list = None, ): self.file_base64 = file_base64 self.file = File(file) if file else None @@ -32,7 +31,6 @@ def __init__( self.entities = entities if entities is not None else [] self.run_id = run_id self.status = status - self.errors = errors def __repr__(self): return ( @@ -42,7 +40,7 @@ def __repr__(self): f"char_count={self.char_count!r}, size_in_kb={self.size_in_kb!r}, " f"duration_in_seconds={self.duration_in_seconds!r}, page_count={self.page_count!r}, " f"slide_count={self.slide_count!r}, entities={self.entities!r}, " - f"run_id={self.run_id!r}, status={self.status!r}, errors={self.errors!r})" + f"run_id={self.run_id!r}, status={self.status!r})" ) def __str__(self): diff --git a/skyflow/vault/detect/_file_input.py b/skyflow/vault/detect/_file_input.py index 472ca0e2..6b8bc2fb 100644 --- a/skyflow/vault/detect/_file_input.py +++ b/skyflow/vault/detect/_file_input.py @@ -1,13 +1,15 @@ +from io import BufferedReader + class FileInput: """ Represents a file input for the vault detection process. Attributes: - file (str): The file object to be processed. This can be a file-like object or a binary string. + file (BufferedReader): The file object to be processed. This can be a file-like object or a binary string. file_path (str): The path to the file to be processed. """ - def __init__(self, file: str= None, file_path: str = None): + def __init__(self, file: BufferedReader= None, file_path: str = None): self.file = file self.file_path = file_path diff --git a/skyflow/vault/tokens/_tokenize_response.py b/skyflow/vault/tokens/_tokenize_response.py index 264b3987..598c2a1c 100644 --- a/skyflow/vault/tokens/_tokenize_response.py +++ b/skyflow/vault/tokens/_tokenize_response.py @@ -1,10 +1,11 @@ class TokenizeResponse: - def __init__(self, tokenized_fields = None): + def __init__(self, tokenized_fields = None, errors = None): self.tokenized_fields = tokenized_fields + self.errors = errors def __repr__(self): - return f"TokenizeResponse(tokenized_fields={self.tokenized_fields})" + return f"TokenizeResponse(tokenized_fields={self.tokenized_fields}, errors={self.errors})" def __str__(self): return self.__repr__() diff --git a/tests/utils/validations/__init__.py b/tests/utils/validations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/utils/validations/test__validations.py b/tests/utils/validations/test__validations.py new file mode 100644 index 00000000..48332a55 --- /dev/null +++ b/tests/utils/validations/test__validations.py @@ -0,0 +1,1046 @@ +import unittest +from unittest.mock import Mock, patch, MagicMock +import tempfile +import os + +from skyflow.error import SkyflowError +from skyflow.utils.validations._validations import ( + validate_required_field, validate_api_key, validate_credentials, + validate_log_level, validate_keys, validate_vault_config, + validate_update_vault_config, validate_connection_config, + validate_update_connection_config, validate_file_from_request, + validate_insert_request, validate_delete_request, validate_query_request, + validate_get_detect_run_request, validate_get_request, validate_update_request, + validate_detokenize_request, validate_tokenize_request, validate_invoke_connection_params, + validate_deidentify_text_request, validate_reidentify_text_request, validate_deidentify_file_request +) +from skyflow.utils import SkyflowMessages +from skyflow.utils.enums import DetectEntities, RedactionType +from skyflow.vault.data import GetRequest, UpdateRequest +from skyflow.vault.detect import DeidentifyTextRequest, Transformations, DateTransformation, ReidentifyTextRequest, \ + FileInput, DeidentifyFileRequest +from skyflow.vault.tokens import DetokenizeRequest +from skyflow.vault.connection._invoke_connection_request import InvokeConnectionRequest + +class TestValidations(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.temp_file = tempfile.NamedTemporaryFile(delete=False) + cls.temp_file.write(b"test content") + cls.temp_file.close() + cls.temp_file_path = cls.temp_file.name + cls.temp_dir = tempfile.TemporaryDirectory() + cls.temp_dir_path = cls.temp_dir.name + + @classmethod + def tearDownClass(cls): + if os.path.exists(cls.temp_file_path): + os.unlink(cls.temp_file_path) + cls.temp_dir.cleanup() + + def setUp(self): + self.logger = Mock() + + def test_validate_required_field_valid(self): + config = {"test_field": "test_value"} + validate_required_field( + self.logger, + config, + "test_field", + str, + "Empty error", + "Invalid error" + ) + + def test_validate_required_field_missing(self): + config = {} + with self.assertRaises(SkyflowError) as context: + validate_required_field( + self.logger, + config, + "vault_id", + str, + "Empty error", + "Invalid error" + ) + self.assertEqual(context.exception.message, "Invalid error") + + def test_validate_required_field_empty_string(self): + config = {"test_field": ""} + with self.assertRaises(SkyflowError) as context: + validate_required_field( + self.logger, + config, + "test_field", + str, + "Empty error", + "Invalid error" + ) + self.assertEqual(context.exception.message, "Empty error") + + def test_validate_required_field_wrong_type(self): + config = {"test_field": 123} + with self.assertRaises(SkyflowError) as context: + validate_required_field( + self.logger, + config, + "test_field", + str, + "Empty error", + "Invalid error" + ) + self.assertEqual(context.exception.message, "Invalid error") + + def test_validate_api_key_valid(self): + valid_key = "sky-abc12-1234567890abcdef1234567890abcdef" + self.assertTrue(validate_api_key(valid_key, self.logger)) + + def test_validate_api_key_invalid_prefix(self): + invalid_key = "invalid-abc12-1234567890abcdef1234567890abcdef" + self.assertFalse(validate_api_key(invalid_key, self.logger)) + + def test_validate_api_key_invalid_length(self): + invalid_key = "sky-abc12-123456" + self.assertFalse(validate_api_key(invalid_key, self.logger)) + + def test_validate_credentials_with_api_key(self): + credentials = { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + validate_credentials(self.logger, credentials) + + def test_validate_credentials_with_expired_token(self): + credentials = { + "token": "expired_token" + } + with patch('skyflow.service_account.is_expired', return_value=True): + with self.assertRaises(SkyflowError) as context: + validate_credentials(self.logger, credentials) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_CREDENTIALS_TOKEN.value) + + def test_validate_credentials_empty_credentials(self): + credentials = {} + with self.assertRaises(SkyflowError) as context: + validate_credentials(self.logger, credentials) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_CREDENTIALS.value) + + def test_validate_credentials_multiple_auth_methods(self): + credentials = { + "token": "valid_token", + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + with self.assertRaises(SkyflowError) as context: + validate_credentials(self.logger, credentials) + self.assertEqual(context.exception.message, SkyflowMessages.Error.MULTIPLE_CREDENTIALS_PASSED.value) + + + def test_validate_credentials_with_empty_context(self): + credentials = { + "token": "valid_token", + "context": "" + } + with patch('skyflow.service_account.is_expired', return_value=False): + with self.assertRaises(SkyflowError) as context: + validate_credentials(self.logger, credentials) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_CONTEXT.value) + + def test_validate_log_level_valid(self): + from skyflow.utils.enums import LogLevel + log_level = LogLevel.ERROR + validate_log_level(self.logger, log_level) + + def test_validate_log_level_invalid(self): + class InvalidEnum: + pass + invalid_log_level = InvalidEnum() + with self.assertRaises(SkyflowError) as context: + validate_log_level(self.logger, invalid_log_level) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_LOG_LEVEL.value) + + def test_validate_log_level_none(self): + with self.assertRaises(SkyflowError) as context: + validate_log_level(self.logger, None) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_LOG_LEVEL.value) + + def test_validate_keys_valid(self): + config = {"vault_id": "test_id", "cluster_id": "test_cluster"} + validate_keys(self.logger, config, ["vault_id", "cluster_id"]) + + def test_validate_keys_invalid(self): + config = {"invalid_key": "value"} + with self.assertRaises(SkyflowError) as context: + validate_keys(self.logger, config, ["vault_id", "cluster_id"]) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_KEY.value.format("invalid_key")) + + def test_validate_vault_config_valid(self): + from skyflow.utils.enums import Env + config = { + "vault_id": "vault123", + "cluster_id": "cluster123", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + }, + "env": Env.DEV + } + self.assertTrue(validate_vault_config(self.logger, config)) + + def test_validate_vault_config_missing_required(self): + config = { + "cluster_id": "cluster123" + } + with self.assertRaises(SkyflowError) as context: + validate_vault_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_VAULT_ID.value) + + + def test_validate_update_vault_config_valid(self): + from skyflow.utils.enums import Env + config = { + "vault_id": "vault123", + "cluster_id": "cluster123", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + }, + "env": Env.DEV + } + self.assertTrue(validate_update_vault_config(self.logger, config)) + + def test_validate_update_vault_config_missing_credentials(self): + config = { + "vault_id": "vault123", + "cluster_id": "cluster123" + } + with self.assertRaises(SkyflowError) as context: + validate_update_vault_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_CREDENTIALS.value.format("vault", "vault123")) + + def test_validate_update_vault_config_invalid_cluster_id(self): + config = { + "vault_id": "vault123", + "cluster_id": "", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + with self.assertRaises(SkyflowError) as context: + validate_update_vault_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_CLUSTER_ID.value.format("vault123")) + + def test_validate_connection_config_valid(self): + config = { + "connection_id": "conn123", + "connection_url": "https://example.com", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + self.assertTrue(validate_connection_config(self.logger, config)) + + def test_validate_connection_config_missing_url(self): + config = { + "connection_id": "conn123", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + with self.assertRaises(SkyflowError) as context: + validate_connection_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_CONNECTION_URL.value.format("conn123")) + + def test_validate_connection_config_empty_connection_id(self): + config = { + "connection_id": "", + "connection_url": "https://example.com", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + with self.assertRaises(SkyflowError) as context: + validate_connection_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_CONNECTION_ID.value) + + def test_validate_update_connection_config_valid(self): + config = { + "connection_id": "conn123", + "connection_url": "https://example.com", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + self.assertTrue(validate_update_connection_config(self.logger, config)) + + def test_validate_update_connection_config_missing_credentials(self): + config = { + "connection_id": "conn123", + "connection_url": "https://example.com" + } + with self.assertRaises(SkyflowError) as context: + validate_update_connection_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_CREDENTIALS.value.format("connection", "conn123")) + + def test_validate_update_connection_config_empty_url(self): + config = { + "connection_id": "conn123", + "connection_url": "", + "credentials": { + "api_key": "sky-abc12-1234567890abcdef1234567890abcdef" + } + } + with self.assertRaises(SkyflowError) as context: + validate_update_connection_config(self.logger, config) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_CONNECTION_URL.value.format("conn123")) + + def test_validate_file_from_request_valid_file(self): + file_obj = MagicMock() + file_obj.name = "test.txt" + file_input = MagicMock() + file_input.file = file_obj + file_input.file_path = None + validate_file_from_request(file_input) + + def test_validate_file_from_request_valid_file_path(self): + file_input = MagicMock() + file_input.file = None + file_input.file_path = self.temp_file_path + validate_file_from_request(file_input) + + def test_validate_file_from_request_missing_both(self): + file_input = MagicMock() + file_input.file = None + file_input.file_path = None + with self.assertRaises(SkyflowError) as context: + validate_file_from_request(file_input) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_INPUT.value) + + def test_validate_file_from_request_both_provided(self): + file_obj = MagicMock() + file_obj.name = "test.txt" + file_input = MagicMock() + file_input.file = file_obj + file_input.file_path = "/path/to/file" + with self.assertRaises(SkyflowError) as context: + validate_file_from_request(file_input) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_INPUT.value) + + + def test_validate_file_from_request_invalid_file_path(self): + file_input = MagicMock() + file_input.file = None + file_input.file_path = "/nonexistent/path/to/file" + with self.assertRaises(SkyflowError) as context: + validate_file_from_request(file_input) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_DEIDENTIFY_FILE_PATH.value) + + def test_validate_insert_request_valid(self): + request = MagicMock() + request.table = "test_table" + request.values = [{"field1": "value1"}] + request.upsert = None + request.homogeneous = None + request.token_mode = None + request.return_tokens = False + request.continue_on_error = False + request.tokens = None + validate_insert_request(self.logger, request) + + def test_validate_insert_request_invalid_table(self): + request = MagicMock() + request.table = 123 + request.values = [{"field1": "value1"}] + with self.assertRaises(SkyflowError) as context: + validate_insert_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TABLE_NAME_IN_INSERT.value) + + def test_validate_insert_request_empty_values(self): + request = MagicMock() + request.table = "test_table" + request.values = [] + with self.assertRaises(SkyflowError) as context: + validate_insert_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_DATA_IN_INSERT.value) + + + def test_validate_delete_request_valid(self): + request = MagicMock() + request.table = "test_table" + request.ids = ["id1", "id2"] + validate_delete_request(self.logger, request) + + def test_validate_delete_request_empty_table(self): + request = MagicMock() + request.table = "" + request.ids = ["id1"] + with self.assertRaises(SkyflowError) as context: + validate_delete_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_TABLE_VALUE.value) + + def test_validate_delete_request_missing_ids(self): + request = MagicMock() + request.table = "test_table" + request.ids = None + with self.assertRaises(SkyflowError) as context: + validate_delete_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_RECORD_IDS_IN_DELETE.value) + + def test_validate_query_request_valid(self): + request = MagicMock() + request.query = "SELECT * FROM test_table" + validate_query_request(self.logger, request) + + def test_validate_query_request_empty_query(self): + request = MagicMock() + request.query = "" + with self.assertRaises(SkyflowError) as context: + validate_query_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_QUERY.value) + + def test_validate_query_request_invalid_query_type(self): + request = MagicMock() + request.query = 123 + with self.assertRaises(SkyflowError) as context: + validate_query_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_QUERY_TYPE.value.format(str(type(123)))) + + def test_validate_query_request_non_select_query(self): + request = MagicMock() + request.query = "INSERT INTO test_table VALUES (1)" + with self.assertRaises(SkyflowError) as context: + validate_query_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_QUERY_COMMAND.value.format(request.query)) + + def test_validate_get_detect_run_request_valid(self): + request = MagicMock() + request.run_id = "test_run_123" + validate_get_detect_run_request(self.logger, request) + + def test_validate_get_detect_run_request_empty_run_id(self): + request = MagicMock() + request.run_id = "" + with self.assertRaises(SkyflowError) as context: + validate_get_detect_run_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_RUN_ID.value) + + def test_validate_get_detect_run_request_invalid_run_id_type(self): + request = MagicMock() + request.run_id = 123 # Invalid type + with self.assertRaises(SkyflowError) as context: + validate_get_detect_run_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_RUN_ID.value) + + def test_validate_get_request_valid(self): + from skyflow.utils.enums import RedactionType + request = MagicMock() + request.table = "test_table" + request.redaction_type = RedactionType.PLAIN_TEXT + request.column_name = None + request.column_values = None + request.ids = ["id1", "id2"] + request.fields = ["field1", "field2"] + request.offset = None + request.limit = None + request.download_url = False + request.return_tokens = False + validate_get_request(self.logger, request) + + + def test_validate_get_request_invalid_table_type(self): + request = MagicMock() + request.table = 123 + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TABLE_VALUE.value) + + def test_validate_get_request_empty_table(self): + request = MagicMock() + request.table = "" + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_TABLE_VALUE.value) + + def test_validate_get_request_invalid_redaction_type(self): + request = GetRequest( + table="test_table", + fields="invalid", + ids=["id1", "id2"], + redaction_type="invalid" + ) + + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_REDACTION_TYPE.value.format(type(request.redaction_type))) + + def test_validate_get_request_invalid_fields_type(self): + request= GetRequest( + table="test_table", + fields="invalid" + ) + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_FIELDS_VALUE.value.format(type(request.fields))) + + def test_validate_get_request_empty_fields(self): + request = GetRequest( + table="test_table", + ids=[], + fields=[] + ) + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_FIELDS_VALUE.value.format(type(request.fields))) + + def test_validate_get_request_invalid_column_values_type(self): + request = GetRequest( + table="test_table", + column_name="test_column", + column_values="invalid", + ) + + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_COLUMN_VALUE.value.format(type(request.column_values))) + + def test_validate_get_request_tokens_with_redaction(self): + request = GetRequest( + table="test_table", + return_tokens=True, + redaction_type = RedactionType.PLAIN_TEXT + ) + + with self.assertRaises(SkyflowError) as context: + validate_get_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.REDACTION_WITH_TOKENS_NOT_SUPPORTED.value) + + def test_validate_query_request_valid_complex(self): + request = MagicMock() + request.query = "SELECT * FROM table1 JOIN table2 ON table1.id = table2.id WHERE field = 'value'" + validate_query_request(self.logger, request) + + + def test_validate_query_request_invalid_update(self): + request = MagicMock() + request.query = "UPDATE table SET field = 'value'" # Only SELECT allowed + with self.assertRaises(SkyflowError) as context: + validate_query_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_QUERY_COMMAND.value.format(request.query)) + + def test_validate_update_request_valid(self): + request = MagicMock() + request.table = "test_table" + request.data = {"skyflow_id": "id123", "field1": "value1"} + request.return_tokens = False + request.token_mode = None + request.tokens = None + validate_update_request(self.logger, request) + + def test_validate_update_request_invalid_table_type(self): + request = UpdateRequest( + table=123, + data = {"skyflow_id": "id123"} + ) + with self.assertRaises(SkyflowError) as context: + validate_update_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TABLE_VALUE.value) + + def test_validate_update_request_invalid_token_mode(self): + request = UpdateRequest( + table="test_table", + data = {"skyflow_id": "id123", "field1": "value1"}, + token_mode = "invalid" + ) + with self.assertRaises(SkyflowError) as context: + validate_update_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TOKEN_MODE_TYPE.value) + + def test_validate_detokenize_request_valid(self): + request = MagicMock() + request.data = [{"token": "token123"}] + request.continue_on_error = False + validate_detokenize_request(self.logger, request) + + def test_validate_detokenize_request_empty_data(self): + request = MagicMock() + request.data = [] # Empty list + request.continue_on_error = False + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_TOKENS_LIST_VALUE.value) + + def test_validate_detokenize_request_invalid_token(self): + request = MagicMock() + request.data = [{"token": 123}] # Invalid token type + request.continue_on_error = False + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TOKEN_TYPE.value.format("DETOKENIZE")) + + def test_validate_tokenize_request_valid(self): + request = MagicMock() + request.values = [{"value": "test", "column_group": "group1"}] + validate_tokenize_request(self.logger, request) + + + def test_validate_tokenize_request_invalid_values_type(self): + request = MagicMock() + request.values = "invalid" # Should be list + with self.assertRaises(SkyflowError) as context: + validate_tokenize_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TOKENIZE_PARAMETERS.value.format(type(request.values))) + + def test_validate_tokenize_request_empty_values(self): + request = MagicMock() + request.values = [] # Empty list + with self.assertRaises(SkyflowError) as context: + validate_tokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_TOKENIZE_PARAMETERS.value) + + def test_validate_tokenize_request_missing_required_fields(self): + request = MagicMock() + request.values = [{"value": "test"}] # Missing column_group + with self.assertRaises(SkyflowError) as context: + validate_tokenize_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TOKENIZE_PARAMETER_KEY.value.format(0)) + + def test_validate_invoke_connection_params_valid(self): + query_params = {"param1": "value1"} + path_params = {"path1": "value1"} + validate_invoke_connection_params(self.logger, query_params, path_params) + + def test_validate_invoke_connection_params_invalid_path_params_type(self): + request = InvokeConnectionRequest( + method="GET", + query_params={"param1": "value1"}, + path_params="invalid" + ) + with self.assertRaises(SkyflowError) as context: + validate_invoke_connection_params(self.logger, request.query_params, request.path_params) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_PATH_PARAMS.value) + + def test_validate_invoke_connection_params_invalid_query_params_type(self): + request = InvokeConnectionRequest( + method="GET", + query_params="invalid", + path_params={"path1": "value1"} + ) + with self.assertRaises(SkyflowError) as context: + validate_invoke_connection_params(self.logger, request.query_params, request.path_params) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_QUERY_PARAMS.value) + + def test_validate_invoke_connection_params_non_string_path_param(self): + request = InvokeConnectionRequest( + method="GET", + query_params={"param1": "value1"}, + path_params={1: "value1"} + ) + with self.assertRaises(SkyflowError) as context: + validate_invoke_connection_params(self.logger, request.query_params, request.path_params) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_PATH_PARAMS.value) + + def test_validate_invoke_connection_params_non_string_query_param_key(self): + request = InvokeConnectionRequest( + method="GET", + query_params={1: "value1"}, + path_params={"path1": "value1"} + ) + with self.assertRaises(SkyflowError) as context: + validate_invoke_connection_params(self.logger, request.query_params, request.path_params) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_QUERY_PARAMS.value) + + def test_validate_invoke_connection_params_non_serializable_query_params(self): + class NonSerializable: + pass + request = InvokeConnectionRequest( + method="GET", + query_params={"param1": NonSerializable()}, + path_params={"path1": "value1"} + ) + with self.assertRaises(SkyflowError) as context: + validate_invoke_connection_params(self.logger, request.query_params, request.path_params) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_QUERY_PARAMS.value) + + def test_validate_deidentify_text_request_valid(self): + request = DeidentifyTextRequest( + text="test", + entities=None, + allow_regex_list=None, + restrict_regex_list = None, + token_format = None, + transformations = None, + ) + validate_deidentify_text_request(self.logger, request) + + def test_validate_reidentify_text_request_valid(self): + request = ReidentifyTextRequest( + text="test", + masked_entities=[DetectEntities.CREDIT_CARD], + redacted_entities=[DetectEntities.SSN], + plain_text_entities=None, + ) + validate_reidentify_text_request(self.logger, request) + + def test_validate_reidentify_text_request_empty_text(self): + request = ReidentifyTextRequest( + text="", + masked_entities=[DetectEntities.CREDIT_CARD], + redacted_entities=[DetectEntities.SSN], + ) + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TEXT_IN_REIDENTIFY.value) + + def test_validate_reidentify_text_request_invalid_redacted_entities(self): + request = ReidentifyTextRequest( + text="test", + redacted_entities="invalid", + ) + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_REDACTED_ENTITIES_IN_REIDENTIFY.value) + + def test_validate_reidentify_text_request_invalid_plain_text_entities(self): + request = ReidentifyTextRequest( + text="test", + plain_text_entities="invalid", + ) + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_PLAIN_TEXT_ENTITIES_IN_REIDENTIFY.value) + + + def test_validate_deidentify_text_request_empty_text(self): + request = DeidentifyTextRequest( + text="", + entities=None, + allow_regex_list=None, + restrict_regex_list=None, + token_format=None, + transformations=None, + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TEXT_IN_DEIDENTIFY.value) + + def test_validate_deidentify_text_request_invalid_text_type(self): + request = DeidentifyTextRequest( + text=["test"], + entities=None, + allow_regex_list=None, + restrict_regex_list=None, + token_format=None, + transformations=None, + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TEXT_IN_DEIDENTIFY.value) + + def test_validate_deidentify_text_request_invalid_entities_type(self): + request = DeidentifyTextRequest( + text="test", + entities="invalid", + allow_regex_list=None, + restrict_regex_list=None, + token_format=None, + transformations=None, + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_ENTITIES_IN_DEIDENTIFY.value) + + def test_validate_deidentify_text_request_invalid_allow_regex(self): + request = DeidentifyTextRequest( + text="test", + allow_regex_list="invalid", + restrict_regex_list=None, + token_format=None, + transformations=None, + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_ALLOW_REGEX_LIST.value) + + def test_validate_deidentify_text_request_invalid_restrict_regex(self): + request = DeidentifyTextRequest( + text="test", + restrict_regex_list="invalid", + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_RESTRICT_REGEX_LIST.value) + + def test_validate_deidentify_text_request_invalid_token_format(self): + request = DeidentifyTextRequest( + text="test", + token_format="invalid", + transformations=None, + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TOKEN_FORMAT.value) + + + def test_validate_reidentify_text_request_valid(self): + request = MagicMock() + request.text = "test text" + request.redacted_entities = None + request.masked_entities = None + request.plain_text_entities = None + validate_reidentify_text_request(self.logger, request) + + def test_validate_reidentify_text_request_empty_text(self): + request = MagicMock() + request.text = "" # Empty text + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TEXT_IN_REIDENTIFY.value) + + def test_validate_reidentify_text_request_invalid_text_type(self): + request = MagicMock() + request.text = 123 # Invalid type + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_TEXT_IN_REIDENTIFY.value) + + def test_validate_reidentify_text_request_invalid_redacted_entities(self): + request = MagicMock() + request.text = "test text" + request.redacted_entities = "invalid" + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_REDACTED_ENTITIES_IN_REIDENTIFY.value) + + def test_validate_reidentify_text_request_invalid_plain_text_entities(self): + request = ReidentifyTextRequest( + text="test text", + plain_text_entities="invalid" + ) + with self.assertRaises(SkyflowError) as context: + validate_reidentify_text_request(self.logger, request) + self.assertEqual(context.exception.message, + SkyflowMessages.Error.INVALID_PLAIN_TEXT_ENTITIES_IN_REIDENTIFY.value) + + def test_validate_deidentify_file_request_valid(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + entities=None, + allow_regex_list=None, + restrict_regex_list=None, + token_format=None, + transformations=None, + output_processed_image=None, + output_ocr_text=None, + masking_method=None, + pixel_density=None, + max_resolution=None, + output_processed_audio=None, + output_transcription=None, + bleep=None, + output_directory=None, + wait_time=None + ) + validate_deidentify_file_request(self.logger, request) + + def test_validate_deidentify_file_request_missing_file(self): + request = DeidentifyFileRequest(file=None) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_FILE_INPUT.value) + + def test_validate_deidentify_file_request_invalid_entities(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + entities="invalid" + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_DETECT_ENTITIES_TYPE.value) + + def test_validate_deidentify_file_request_invalid_allow_regex(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + allow_regex_list="invalid", + entities=[DetectEntities.ACCOUNT_NUMBER] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_ALLOW_REGEX_LIST.value) + + def test_validate_deidentify_file_request_invalid_restrict_regex(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + restrict_regex_list="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_RESTRICT_REGEX_LIST.value) + + def test_validate_deidentify_file_request_invalid_token_format(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + token_format="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TOKEN_FORMAT.value) + + def test_validate_deidentify_file_request_invalid_transformations(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + transformations="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TRANSFORMATIONS.value) + + def test_validate_deidentify_file_request_invalid_output_processed_image(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + output_processed_image="true", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_OUTPUT_PROCESSED_IMAGE.value) + + def test_validate_deidentify_file_request_invalid_output_ocr_text(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + output_ocr_text="true", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_OUTPUT_OCR_TEXT.value) + + def test_validate_deidentify_file_request_invalid_masking_method(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + masking_method="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_MASKING_METHOD.value) + + def test_validate_deidentify_file_request_invalid_pixel_density(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + pixel_density="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_PIXEL_DENSITY.value) + + def test_validate_deidentify_file_request_invalid_max_resolution(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + max_resolution="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_MAXIMUM_RESOLUTION.value) + + def test_validate_deidentify_file_request_invalid_output_processed_audio(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + output_processed_audio="true", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_OUTPUT_PROCESSED_AUDIO.value) + + def test_validate_deidentify_file_request_invalid_output_transcription(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + output_transcription="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_OUTPUT_TRANSCRIPTION.value) + + def test_validate_deidentify_file_request_invalid_wait_time(self): + file_input = FileInput(file_path=self.temp_file_path) + request = DeidentifyFileRequest( + file=file_input, + wait_time="invalid", + entities=[DetectEntities.SSN] + ) + with self.assertRaises(SkyflowError) as context: + validate_deidentify_file_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_WAIT_TIME.value) + + def test_validate_detokenize_request_valid(self): + request = DetokenizeRequest( + data=[{"token": "token123", "redaction": RedactionType.PLAIN_TEXT}], + continue_on_error=False + ) + validate_detokenize_request(self.logger, request) + + def test_validate_detokenize_request_empty_data(self): + request = DetokenizeRequest(data=[], continue_on_error=False) + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.EMPTY_TOKENS_LIST_VALUE.value) + + def test_validate_detokenize_request_invalid_token_type(self): + request = DetokenizeRequest(data=[{"token": 123}], continue_on_error=False) + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TOKEN_TYPE.value.format("DETOKENIZE")) + + def test_validate_detokenize_request_missing_token_key(self): + request = DetokenizeRequest(data=[{"not_token": "value"}], continue_on_error=False) + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_TOKENS_LIST_VALUE.value.format(str(type(request.data)))) + + def test_validate_detokenize_request_invalid_continue_on_error_type(self): + request = DetokenizeRequest(data=[{"token": "token123"}], continue_on_error="invalid") + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_CONTINUE_ON_ERROR_TYPE.value) + + def test_validate_detokenize_request_invalid_redaction_type(self): + request = DetokenizeRequest(data=[{"token": "token123", "redaction": "invalid"}], continue_on_error=False) + with self.assertRaises(SkyflowError) as context: + validate_detokenize_request(self.logger, request) + self.assertEqual(context.exception.message, SkyflowMessages.Error.INVALID_REDACTION_TYPE.value.format(str(type("invalid")))) diff --git a/tests/vault/controller/test__detect.py b/tests/vault/controller/test__detect.py index 3096ce08..dc3a753f 100644 --- a/tests/vault/controller/test__detect.py +++ b/tests/vault/controller/test__detect.py @@ -159,7 +159,7 @@ def test_deidentify_file_txt_success(self, mock_open, mock_basename, mock_base64 word_count=1, char_count=1, size_in_kb=1, duration_in_seconds=None, page_count=None, slide_count=None, entities=[], run_id="runid123", - status="SUCCESS", errors=None)) as mock_parse: + status="SUCCESS")) as mock_parse: result = self.detect.deidentify_file(req) mock_validate.assert_called_once() @@ -184,7 +184,6 @@ def test_deidentify_file_txt_success(self, mock_open, mock_basename, mock_base64 self.assertIsNone(result.page_count) self.assertIsNone(result.slide_count) self.assertEqual(result.entities, []) - self.assertEqual(result.errors, None) @patch("skyflow.vault.controller._detect.validate_deidentify_file_request") @patch("skyflow.vault.controller._detect.base64") @@ -222,7 +221,7 @@ def test_deidentify_file_audio_success(self, mock_base64, mock_validate): word_count=1, char_count=1, size_in_kb=1, duration_in_seconds=1, page_count=None, slide_count=None, entities=[], run_id="runid456", - status="SUCCESS", errors=None)) as mock_parse: + status="SUCCESS")) as mock_parse: result = self.detect.deidentify_file(req) mock_validate.assert_called_once() files_api.deidentify_audio.assert_called_once() @@ -263,8 +262,7 @@ def test_get_detect_run_success(self, mock_validate): return_value=DeidentifyFileResponse(file="file", type="txt", extension="txt", word_count=1, char_count=1, size_in_kb=1, duration_in_seconds=None, page_count=None, slide_count=None, entities=[], - run_id="runid789", status="SUCCESS", - errors=None)) as mock_parse: + run_id="runid789", status="SUCCESS")) as mock_parse: result = self.detect.get_detect_run(req) mock_validate.assert_called_once() files_api.get_run.assert_called_once() @@ -658,7 +656,11 @@ def test_deidentify_file_using_file_path(self, mock_open, mock_basename, mock_ba # Setup processed response processed_response = Mock() processed_response.status = "SUCCESS" - processed_response.output = [] + processed_response.output = [ + Mock(processedFile="dGVzdCBjb250ZW", + processedFileType="txt", + processedFileExtension="txt") + ] processed_response.wordCharacterCount = Mock(wordCount=1, characterCount=1) # Test the method @@ -679,16 +681,14 @@ def test_deidentify_file_using_file_path(self, mock_open, mock_basename, mock_ba entities=[], run_id="runid123", status="SUCCESS", - errors=None )) as mock_parse: result = self.detect.deidentify_file(req) mock_file.read.assert_called_once() - mock_basename.assert_called_with("/path/to/test.txt") - mock_validate.assert_called_once() files_api.deidentify_text.assert_called_once() + mock_basename.assert_called_with("/path/to/test.txt") mock_poll.assert_called_once() mock_parse.assert_called_once() @@ -710,4 +710,3 @@ def test_deidentify_file_using_file_path(self, mock_open, mock_basename, mock_ba self.assertIsNone(result.page_count) self.assertIsNone(result.slide_count) self.assertEqual(result.entities, []) - self.assertEqual(result.errors, None) diff --git a/tests/vault/controller/test__vault.py b/tests/vault/controller/test__vault.py index 8d1d1ab0..4e1a0dda 100644 --- a/tests/vault/controller/test__vault.py +++ b/tests/vault/controller/test__vault.py @@ -30,7 +30,7 @@ def test_insert_with_continue_on_error(self, mock_parse_response, mock_validate) # Mock request request = InsertRequest( - table_name=TABLE_NAME, + table=TABLE_NAME, values=[{"field": "value"}], tokens=None, return_tokens=True, @@ -89,7 +89,7 @@ def test_insert_with_continue_on_error_false(self, mock_parse_response, mock_val # Mock request with continue_on_error set to False request = InsertRequest( - table_name=TABLE_NAME, + table=TABLE_NAME, values=[{"field": "value"}], tokens=None, return_tokens=True, @@ -129,7 +129,7 @@ def test_insert_with_continue_on_error_false(self, mock_parse_response, mock_val @patch("skyflow.vault.controller._vault.validate_insert_request") def test_insert_handles_generic_error(self, mock_validate): - request = InsertRequest(table_name="test_table", values=[{"column_name": "value"}], return_tokens=False, + request = InsertRequest(table="test_table", values=[{"column_name": "value"}], return_tokens=False, upsert=False, homogeneous=False, continue_on_error=False, token_mode=Mock()) records_api = self.vault_client.get_records_api.return_value @@ -147,7 +147,7 @@ def test_insert_with_continue_on_error_false_when_tokens_are_not_none(self, mock # Mock request with continue_on_error set to False request = InsertRequest( - table_name=TABLE_NAME, + table=TABLE_NAME, values=[{"field": "value"}], tokens=[{"token_field": "token_val1"}], return_tokens=True, From aaa18c7e3f3494f80e9c3b01292de6fd13ca5736 Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Wed, 17 Sep 2025 10:56:44 +0000 Subject: [PATCH 15/42] [AUTOMATED] Private Release 1.15.4.dev0+8e1d653 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b349bede..6d914ff6 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '2.1.0b1.dev0+61e368f' +current_version = '1.15.4.dev0+8e1d653' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index fab22bb1..4026c513 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '2.1.0b1.dev0+61e368f' \ No newline at end of file +SDK_VERSION = '1.15.4.dev0+8e1d653' \ No newline at end of file From 57d594b4b464a448b653d25086a4ce9b37873ad2 Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Thu, 18 Sep 2025 23:10:18 +0530 Subject: [PATCH 16/42] SK-2296: Release/25.9.14 (#200) * Merge pull request #199 from skyflowapi/skyflow-vivek/SK-2296-use-sessions-for-insert SK-2296 Use sessions for insert method * [AUTOMATED] Private Release 1.15.4.dev0+1c45049 --------- Co-authored-by: skyflow-vivek <121149418+skyflow-vivek@users.noreply.github.com> Co-authored-by: skyflow-shravan --- .github/workflows/internal-release.yml | 22 ++++++ .github/workflows/shared-build-and-deploy.yml | 74 +++++++++++++++++++ ci-scripts/bump_version.sh | 13 ++-- setup.py | 2 +- skyflow/_utils.py | 1 + skyflow/errors/_skyflow_errors.py | 1 + skyflow/vault/_client.py | 69 +++++++++++------ skyflow/version.py | 2 +- 8 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/internal-release.yml create mode 100644 .github/workflows/shared-build-and-deploy.yml diff --git a/.github/workflows/internal-release.yml b/.github/workflows/internal-release.yml new file mode 100644 index 00000000..cafdb9c9 --- /dev/null +++ b/.github/workflows/internal-release.yml @@ -0,0 +1,22 @@ +name: Internal Release + +on: + push: + tags-ignore: + - '*.*' + paths-ignore: + - "setup.py" + - "*.yml" + - "*.md" + - "skyflow/version.py" + - "samples/**" + branches: + - release/* + +jobs: + build-and-deploy: + uses: ./.github/workflows/shared-build-and-deploy.yml + with: + ref: ${{ github.ref_name }} + tag: 'internal' + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/shared-build-and-deploy.yml b/.github/workflows/shared-build-and-deploy.yml new file mode 100644 index 00000000..cc6ac048 --- /dev/null +++ b/.github/workflows/shared-build-and-deploy.yml @@ -0,0 +1,74 @@ +name: Shared Build and Deploy + +on: + workflow_call: + inputs: + ref: + description: 'Git reference to use (e.g., main or branch name)' + required: true + type: string + + tag: + description: 'Release Tag' + required: true + type: string + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Resolve Branch for the Tagged Commit + id: resolve-branch + if: ${{ inputs.tag == 'beta' || inputs.tag == 'public' }} + run: | + TAG_COMMIT=$(git rev-list -n 1 ${{ github.ref_name }}) + BRANCH_NAME=$(git branch -r --contains $TAG_COMMIT | grep -o 'origin/.*' | sed 's|origin/||' | head -n 1) + if [ -z "$BRANCH_NAME" ]; then + echo "Error: Could not resolve branch for the tag." + exit 1 + fi + echo "Resolved Branch Name: $BRANCH_NAME" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_ENV + - name: Get Previous tag + id: previoustag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: 1.0.0 + + - name: Bump Version + run: | + chmod +x ./ci-scripts/bump_version.sh + if ${{ inputs.tag == 'internal' }}; then + ./ci-scripts/bump_version.sh "${{ steps.previoustag.outputs.tag }}" "$(git rev-parse --short "$GITHUB_SHA")" + else + ./ci-scripts/bump_version.sh "${{ steps.previoustag.outputs.tag }}" + fi + - name: Commit changes + run: | + git config user.name "${{ github.actor }}" + git config user.email "${{ github.actor }}@users.noreply.github.com" + git add setup.py + git add skyflow/version.py + if [[ "${{ inputs.tag }}" == "internal" ]]; then + VERSION="${{ steps.previoustag.outputs.tag }}.dev0+$(git rev-parse --short $GITHUB_SHA)" + COMMIT_MESSAGE="[AUTOMATED] Private Release $VERSION" + git commit -m "$COMMIT_MESSAGE" + git push origin ${{ github.ref_name }} -f + fi + - name: Build and Publish to JFrog Artifactory + if: ${{ inputs.tag == 'internal' }} + env: + TWINE_USERNAME: ${{ secrets.JFROG_USERNAME }} + TWINE_PASSWORD: ${{ secrets.JFROG_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload --repository-url https://prekarilabs.jfrog.io/artifactory/api/pypi/skyflow-python/ dist/* \ No newline at end of file diff --git a/ci-scripts/bump_version.sh b/ci-scripts/bump_version.sh index b0a57a9e..c8f2b9e9 100755 --- a/ci-scripts/bump_version.sh +++ b/ci-scripts/bump_version.sh @@ -1,22 +1,19 @@ Version=$1 SEMVER=$Version - if [ -z $2 ] then echo "Bumping package version to $1" - sed -E "s/current_version = .+/current_version = \'$SEMVER\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile - echo -------------------------- echo "Done, Package now at $1" else - echo "Bumping package version to $1-dev.$2" + echo "Bumping package version to $1.dev0+$2" - sed -E "s/current_version = .+/current_version = \'$SEMVER-dev.$2\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile - sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER-dev.$2\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile + sed -E "s/current_version = .+/current_version = \'$SEMVER.dev0+$2\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile + sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER.dev0+$2\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile echo -------------------------- - echo "Done, Package now at $1-dev.$2" -fi + echo "Done, Package now at $1.dev0+$2" +fi \ No newline at end of file diff --git a/setup.py b/setup.py index 323fa31d..c83828ab 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.4' +current_version = '1.15.4.dev0+1c45049' setup( name='skyflow', diff --git a/skyflow/_utils.py b/skyflow/_utils.py index 83bf54a6..1e1b7109 100644 --- a/skyflow/_utils.py +++ b/skyflow/_utils.py @@ -53,6 +53,7 @@ def log_error(message: str, interface: str): class InfoMessages(Enum): INITIALIZE_CLIENT = "Initializing skyflow client" CLIENT_INITIALIZED = "Initialized skyflow client successfully" + CLOSING_SESSION = "Closing the session" VALIDATE_INSERT_RECORDS = "Validating insert records" VALIDATE_DETOKENIZE_INPUT = "Validating detokenize input" VALIDATE_GET_BY_ID_INPUT = "Validating getByID input" diff --git a/skyflow/errors/_skyflow_errors.py b/skyflow/errors/_skyflow_errors.py index 2e792812..ba52c4e5 100644 --- a/skyflow/errors/_skyflow_errors.py +++ b/skyflow/errors/_skyflow_errors.py @@ -16,6 +16,7 @@ class SkyflowErrorCodes(Enum): class SkyflowErrorMessages(Enum): API_ERROR = "Server returned status code %s" + NETWORK_ERROR = "Network error occurred: %s" FILE_NOT_FOUND = "File at %s not found" FILE_INVALID_JSON = "File at %s is not in JSON format" diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e21dcbab..97979505 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,6 +5,7 @@ import types import requests import asyncio +from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -36,49 +37,71 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) + self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) + + def _create_session(self): + self.session = requests.Session() + adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) + self.session.mount("https://", adapter) + + def __del__(self): + if (self.session is not None): + log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) + self.session.close() + self.session = None + + def _get_session(self): + if (self.session is None): + self._create_session() + return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): + max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) - jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()) - } - max_retries = 3 - # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries+1): + + for attempt in range(max_retries + 1): try: - # If jsonBody is a dict, use json=, else use data= - response = requests.post(requestURL, data=jsonBody, headers=headers) + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()), + } + response = self._get_session().post( + requestURL, + data=jsonBody, + headers=headers, + ) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except Exception as err: + except requests.exceptions.ConnectionError as err: if attempt < max_retries: - continue - else: - if isinstance(err, SkyflowError): - raise err - else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + continue + raise SkyflowError( + SkyflowErrorCodes.SERVER_ERROR, + SkyflowErrorMessages.NETWORK_ERROR.value % str(err), + interface=interface + ) + except SkyflowError as err: + if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: + raise err + continue def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value diff --git a/skyflow/version.py b/skyflow/version.py index 5c7ae5de..fa38ca14 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.4' \ No newline at end of file +SDK_VERSION = '1.15.4.dev0+1c45049' \ No newline at end of file From d2979c3af35aad2d895748ea34017945c6b6bf57 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Thu, 18 Sep 2025 17:42:36 +0000 Subject: [PATCH 17/42] [AUTOMATED] Public Release - 1.15.5 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c83828ab..b62c0129 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.4.dev0+1c45049' +current_version = '1.15.5' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index fa38ca14..461f4702 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.4.dev0+1c45049' \ No newline at end of file +SDK_VERSION = '1.15.5' \ No newline at end of file From dab7b669fbabd477657c9c7c4ecf2ba8ade9728b Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Fri, 19 Sep 2025 19:47:38 +0530 Subject: [PATCH 18/42] SK-2270: revert the fern generated detect changes (#201) --- skyflow/generated/rest/__init__.py | 4 ++-- skyflow/generated/rest/core/client_wrapper.py | 2 +- skyflow/generated/rest/types/__init__.py | 4 ++-- skyflow/generated/rest/types/deidentify_status_response.py | 2 +- skyflow/generated/rest/types/error_string.py | 3 +++ skyflow/generated/rest/types/reidentify_file_response.py | 3 +-- 6 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 skyflow/generated/rest/types/error_string.py diff --git a/skyflow/generated/rest/__init__.py b/skyflow/generated/rest/__init__.py index b8309d05..7eda9318 100644 --- a/skyflow/generated/rest/__init__.py +++ b/skyflow/generated/rest/__init__.py @@ -28,12 +28,12 @@ EntityTypes, ErrorResponse, ErrorResponseError, + ErrorString, GooglerpcStatus, ProtobufAny, RedactionEnumRedaction, ReidentifyFileResponse, ReidentifyFileResponseOutput, - ReidentifyFileResponseOutputType, ReidentifyFileResponseStatus, ReidentifyStringResponse, RequestActionType, @@ -176,6 +176,7 @@ "EntityTypes", "ErrorResponse", "ErrorResponseError", + "ErrorString", "GooglerpcStatus", "InternalServerError", "NotFoundError", @@ -189,7 +190,6 @@ "ReidentifyFileRequestFormat", "ReidentifyFileResponse", "ReidentifyFileResponseOutput", - "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", "ReidentifyStringRequestFormat", "ReidentifyStringResponse", diff --git a/skyflow/generated/rest/core/client_wrapper.py b/skyflow/generated/rest/core/client_wrapper.py index 5179f373..a3210a7e 100644 --- a/skyflow/generated/rest/core/client_wrapper.py +++ b/skyflow/generated/rest/core/client_wrapper.py @@ -24,7 +24,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "skyflow_vault", - "X-Fern-SDK-Version": "0.0.323", + "X-Fern-SDK-Version": "0.0.252", **(self.get_custom_headers() or {}), } headers["Authorization"] = f"Bearer {self._get_token()}" diff --git a/skyflow/generated/rest/types/__init__.py b/skyflow/generated/rest/types/__init__.py index 92d826c9..aa9b4a35 100644 --- a/skyflow/generated/rest/types/__init__.py +++ b/skyflow/generated/rest/types/__init__.py @@ -27,12 +27,12 @@ from .entity_types import EntityTypes from .error_response import ErrorResponse from .error_response_error import ErrorResponseError +from .error_string import ErrorString from .googlerpc_status import GooglerpcStatus from .protobuf_any import ProtobufAny from .redaction_enum_redaction import RedactionEnumRedaction from .reidentify_file_response import ReidentifyFileResponse from .reidentify_file_response_output import ReidentifyFileResponseOutput -from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus from .reidentify_string_response import ReidentifyStringResponse from .request_action_type import RequestActionType @@ -106,12 +106,12 @@ "EntityTypes", "ErrorResponse", "ErrorResponseError", + "ErrorString", "GooglerpcStatus", "ProtobufAny", "RedactionEnumRedaction", "ReidentifyFileResponse", "ReidentifyFileResponseOutput", - "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", "ReidentifyStringResponse", "RequestActionType", diff --git a/skyflow/generated/rest/types/deidentify_status_response.py b/skyflow/generated/rest/types/deidentify_status_response.py index 712a85b2..68a6cd3f 100644 --- a/skyflow/generated/rest/types/deidentify_status_response.py +++ b/skyflow/generated/rest/types/deidentify_status_response.py @@ -24,7 +24,7 @@ class DeidentifyStatusResponse(UniversalBaseModel): How the input file was specified. """ - output_type: DeidentifyStatusResponseOutputType = pydantic.Field() + output_type: typing.Optional[DeidentifyStatusResponseOutputType] = pydantic.Field(default=None) """ How the output file is specified. """ diff --git a/skyflow/generated/rest/types/error_string.py b/skyflow/generated/rest/types/error_string.py new file mode 100644 index 00000000..4ebbdff4 --- /dev/null +++ b/skyflow/generated/rest/types/error_string.py @@ -0,0 +1,3 @@ +# This file was auto-generated by Fern from our API Definition. + +ErrorString = str \ No newline at end of file diff --git a/skyflow/generated/rest/types/reidentify_file_response.py b/skyflow/generated/rest/types/reidentify_file_response.py index bd90fb49..c67b41ac 100644 --- a/skyflow/generated/rest/types/reidentify_file_response.py +++ b/skyflow/generated/rest/types/reidentify_file_response.py @@ -5,7 +5,6 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel from .reidentify_file_response_output import ReidentifyFileResponseOutput -from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus @@ -19,7 +18,7 @@ class ReidentifyFileResponse(UniversalBaseModel): Status of the re-identify operation. """ - output_type: ReidentifyFileResponseOutputType = pydantic.Field() + output_type: typing.Literal["BASE64"] = pydantic.Field(default="BASE64") """ Format of the output file. """ From bdcc5ac585904c164941dd924f2e04f6a82ab0e7 Mon Sep 17 00:00:00 2001 From: raushan-skyflow Date: Fri, 19 Sep 2025 14:17:54 +0000 Subject: [PATCH 19/42] [AUTOMATED] Private Release 1.15.5.dev0+dab7b66 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6d914ff6..a95dc1dd 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.4.dev0+8e1d653' +current_version = '1.15.5.dev0+dab7b66' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index 4026c513..f15769f8 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.4.dev0+8e1d653' \ No newline at end of file +SDK_VERSION = '1.15.5.dev0+dab7b66' \ No newline at end of file From 3bb2b6a715b20cf707e3941e144cf7f5fddf6788 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Mon, 22 Sep 2025 13:26:09 +0000 Subject: [PATCH 20/42] [AUTOMATED] Public Release - 1.15.6 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b62c0129..6c48fc93 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.5' +current_version = '1.15.6' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 461f4702..491b0797 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.5' \ No newline at end of file +SDK_VERSION = '1.15.6' \ No newline at end of file From c1172852c2fed6fed77fcaad83f16040ac996030 Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:14:53 +0530 Subject: [PATCH 21/42] SK-2313 Release/25.9.15 (#203) * SK-2131 add retry for errors (#202) --------- Co-authored-by: skyflow-shravan --- setup.py | 2 +- skyflow/vault/_client.py | 69 ++++++++++++++-------------------------- skyflow/version.py | 2 +- 3 files changed, 25 insertions(+), 48 deletions(-) diff --git a/setup.py b/setup.py index 6c48fc93..ae32d111 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6' +current_version = '1.15.6.dev0+fcfb86f' setup( name='skyflow', diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 97979505..e21dcbab 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,7 +5,6 @@ import types import requests import asyncio -from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -37,71 +36,49 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) - self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) - - def _create_session(self): - self.session = requests.Session() - adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) - self.session.mount("https://", adapter) - - def __del__(self): - if (self.session is not None): - log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) - self.session.close() - self.session = None - - def _get_session(self): - if (self.session is None): - self._create_session() - return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): - max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) + jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - - for attempt in range(max_retries + 1): + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()) + } + max_retries = 3 + # Use for-loop for retry logic, avoid code repetition + for attempt in range(max_retries+1): try: - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()), - } - response = self._get_session().post( - requestURL, - data=jsonBody, - headers=headers, - ) + # If jsonBody is a dict, use json=, else use data= + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: - continue - raise SkyflowError( - SkyflowErrorCodes.SERVER_ERROR, - SkyflowErrorMessages.NETWORK_ERROR.value % str(err), - interface=interface - ) - except SkyflowError as err: - if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: - raise err - continue + continue + else: + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value diff --git a/skyflow/version.py b/skyflow/version.py index 491b0797..ce8f9cad 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6' \ No newline at end of file +SDK_VERSION = '1.15.6.dev0+fcfb86f' \ No newline at end of file From ada8b744ad2e322937aa5646647a5e2b73ac4c4c Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:16:44 +0530 Subject: [PATCH 22/42] SK-2131 minor fix for remote disconnect error (#204) * SK-2131 minor fix for remote disconnect error * SK-2131 change version --- setup.py | 2 +- skyflow/vault/_client.py | 71 ++++++++++++++++++++++++++-------------- skyflow/version.py | 2 +- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/setup.py b/setup.py index ae32d111..6c48fc93 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6.dev0+fcfb86f' +current_version = '1.15.6' setup( name='skyflow', diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e21dcbab..96d86232 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,6 +5,7 @@ import types import requests import asyncio +from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -36,49 +37,71 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) + self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) + + def _create_session(self): + self.session = requests.Session() + adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) + self.session.mount("https://", adapter) + + def __del__(self): + if (self.session is not None): + log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) + self.session.close() + self.session = None + + def _get_session(self): + if (self.session is None): + self._create_session() + return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): + max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) - jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()) - } - max_retries = 3 - # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries+1): + + for attempt in range(max_retries + 1): try: - # If jsonBody is a dict, use json=, else use data= - response = requests.post(requestURL, data=jsonBody, headers=headers) + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()), + } + response = self._get_session().post( + requestURL, + data=jsonBody, + headers=headers, + ) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except Exception as err: + except requests.exceptions.ConnectionError as err: if attempt < max_retries: - continue - else: - if isinstance(err, SkyflowError): - raise err - else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + continue + raise SkyflowError( + SkyflowErrorCodes.SERVER_ERROR, + SkyflowErrorMessages.NETWORK_ERROR.value % str(err), + interface=interface + ) + except SkyflowError as err: + if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: + raise err + continue def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value @@ -292,4 +315,4 @@ def delete(self, records: dict, options: DeleteOptions = DeleteOptions()): else: log_info(InfoMessages.DELETE_DATA_SUCCESS.value, interface) - return result + return result \ No newline at end of file diff --git a/skyflow/version.py b/skyflow/version.py index ce8f9cad..491b0797 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6.dev0+fcfb86f' \ No newline at end of file +SDK_VERSION = '1.15.6' \ No newline at end of file From b4bd12985c4d2600182f11a5880abed49967bae7 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Tue, 23 Sep 2025 08:19:08 +0000 Subject: [PATCH 23/42] [AUTOMATED] Public Release - 1.16.0 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6c48fc93..2ea897fc 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6' +current_version = '1.16.0' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 491b0797..2bac4cd2 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6' \ No newline at end of file +SDK_VERSION = '1.16.0' \ No newline at end of file From 10350bcc0110a8e6c17e1921fbd74e873b7e4a7e Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Wed, 24 Sep 2025 00:12:45 +0530 Subject: [PATCH 24/42] SK-2131 retry for errors in insert (#205) --- skyflow/vault/_client.py | 68 +++++++++++++--------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 96d86232..f3e02def 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,7 +5,6 @@ import types import requests import asyncio -from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -37,71 +36,48 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) - self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) - - def _create_session(self): - self.session = requests.Session() - adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) - self.session.mount("https://", adapter) - - def __del__(self): - if (self.session is not None): - log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) - self.session.close() - self.session = None - - def _get_session(self): - if (self.session is None): - self._create_session() - return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): - max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - - for attempt in range(max_retries + 1): + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()) + } + max_retries = 3 + # Use for-loop for retry logic, avoid code repetition + for attempt in range(max_retries+1): try: - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()), - } - response = self._get_session().post( - requestURL, - data=jsonBody, - headers=headers, - ) + # If jsonBody is a dict, use json=, else use data= + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: - continue - raise SkyflowError( - SkyflowErrorCodes.SERVER_ERROR, - SkyflowErrorMessages.NETWORK_ERROR.value % str(err), - interface=interface - ) - except SkyflowError as err: - if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: - raise err - continue + continue + else: + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From abd48ff9694c73e5a310372ba93dbe2967dbab14 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Tue, 23 Sep 2025 18:48:05 +0000 Subject: [PATCH 25/42] [AUTOMATED] Public Release - 1.15.7 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2ea897fc..fbb55fbb 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.16.0' +current_version = '1.15.7' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 2bac4cd2..9a299b01 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.16.0' \ No newline at end of file +SDK_VERSION = '1.15.7' \ No newline at end of file From 44e611cec71020b8f30c61313403295c6dd5c525 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Mon, 29 Sep 2025 19:41:55 +0530 Subject: [PATCH 26/42] SK-2325 Fix retry logic when continueOnError is set to true --- skyflow/vault/_client.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index f3e02def..24ba524f 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -64,11 +64,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result except Exception as err: if attempt < max_retries: From d6cbc682b7c203db718a5eeab06ffe695d131f87 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Mon, 29 Sep 2025 15:18:20 +0000 Subject: [PATCH 27/42] [AUTOMATED] Private Release 1.15.7.dev0+cce9db4 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index fbb55fbb..5d6e3251 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7' +current_version = '1.15.7.dev0+cce9db4' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 9a299b01..733b2c9d 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+cce9db4' \ No newline at end of file From b69c3574b4848dd6e3f1b30a6f1a6018cf1dbc25 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 17:51:56 +0530 Subject: [PATCH 28/42] SK-2325 Added unit tests for fix --- tests/vault/test_insert_mocked.py | 239 ++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 tests/vault/test_insert_mocked.py diff --git a/tests/vault/test_insert_mocked.py b/tests/vault/test_insert_mocked.py new file mode 100644 index 00000000..20be5f56 --- /dev/null +++ b/tests/vault/test_insert_mocked.py @@ -0,0 +1,239 @@ +''' + Copyright (c) 2022 Skyflow, Inc. +''' +import json +import unittest +from unittest.mock import Mock, patch, ANY +import os +from dotenv import dotenv_values +import requests +from requests.models import Response +from skyflow.errors._skyflow_errors import SkyflowError +from skyflow.vault._client import Client +from skyflow.vault._config import Configuration, InsertOptions, BYOT + +class TestInsertWithMocks(unittest.TestCase): + def setUp(self) -> None: + self.envValues = dotenv_values(".env") + self.dataPath = os.path.join(os.getcwd(), 'tests/vault/data/') + self.valid_token = self.envValues["MOCK_TOKEN"] + self.record = { + "table": "pii_fields", + "fields": { + "cardNumber": "4111-1111-1111-1111", + "cvv": "234" + } + } + self.data = {"records": [self.record, self.record]} + + # Mock API response data + self.mock_success_response = { + "responses": [ + { + "records": [ + { + "skyflow_id": "123", + "tokens": { + "cardNumber": "card_number_token", + "cvv": "cvv_token" + } + } + ] + }, + { + "records": [ + { + "skyflow_id": "456", + "tokens": { + "cardNumber": "card_number_token", + "cvv": "cvv_token" + } + } + ] + }, + ], + "requestId": "test-request-id" + } + + self.mock_error_response = { + "error": { + "grpc_code": 3, + "http_code": 400, + "message": "Insert failed due to error.", + "http_status": "Bad Request" + } + } + + # Create configurations for testing with different token scenarios + self.valid_config = Configuration( + 'test-vault-id', + 'https://test-vault.skyflow.com', + lambda: self.valid_token + ) + + self.invalid_config = Configuration( + 'test-vault-id', + 'https://test-vault.skyflow.com', + lambda: 'invalid-token' + ) + + @patch('requests.post') + def test_successful_insert(self, mock_post): + # Setup mock response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.content = json.dumps(self.mock_success_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response # Create client and perform insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + result = client.insert(self.data, options) + + # Verify the result + self.assertIn("records", result) + self.assertEqual(len(result["records"]), 2) + self.assertEqual(result["records"][0]["fields"]["cardNumber"], "card_number_token") + + # Verify the API was called with correct parameters + mock_post.assert_called_once() + called_url = mock_post.call_args[0][0] + self.assertTrue(called_url.endswith("/v1/vaults/test-vault-id")) + + @patch('requests.post') + def test_insert_api_error(self, mock_post): + # Setup mock error response + mock_response = Mock(spec=Response) + mock_response.status_code = 400 + mock_response.content = json.dumps(self.mock_error_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + + # Mock raise_for_status to raise HTTPError + def raise_for_status(): + raise requests.exceptions.HTTPError("400 Client Error") + mock_response.raise_for_status = raise_for_status + + mock_post.return_value = mock_response + + # Create client and attempt insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + + # This should raise a SkyflowError + with self.assertRaises(SkyflowError) as context: + client.insert(self.data, options) + + # Verify the error details + self.assertEqual(context.exception.code, 400) + self.assertIn("Insert failed due to error", context.exception.message) + + @patch('requests.post') + def test_insert_network_error(self, mock_post): + # Setup mock to simulate network error + mock_post.side_effect = Exception("Network error") + + # Create client and attempt insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + + # Assert that the insert raises an error + with self.assertRaises(SkyflowError) as context: + client.insert(self.data, options) + + @patch('requests.post') + def test_insert_with_continue_on_error_partial_sucess(self, mock_post): + # Setup mock response with partial success + partial_response = { + "responses": [ + { + "Body": { + "records": [ + { + "skyflow_id": "123", + "tokens": {"cardNumber": "token1"} + } + ] + }, + "Status": 200 + }, + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + } + ], + "requestId": "test-request-id" + } + + mock_response = Mock(spec=Response) + mock_response.status_code = 207 + mock_response.content = json.dumps(partial_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response + + # Create client and perform insert with continueOnError + client = Client(self.valid_config) + options = InsertOptions(tokens=True, continueOnError=True) + + # Create test data with two records + test_data = { + "records": [ + self.record, + self.record # Duplicate record that will cause error + ] + } + + result = client.insert(test_data, options) + + # Verify partial success results + self.assertIn("records", result) + self.assertIn("errors", result) + self.assertEqual(len(result["records"]), 1) + self.assertEqual(len(result["errors"]), 1) + + @patch('requests.post') + def test_insert_with_continue_on_error_complete_failure(self, mock_post): + # Setup mock response with complete failure + complete_failure_response = { + "responses": [ + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + }, + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + } + ], + "requestId": "test-request-id" + } + + mock_response = Mock(spec=Response) + mock_response.status_code = 207 + mock_response.content = json.dumps(complete_failure_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response + + # Create client and perform insert with continueOnError + client = Client(self.valid_config) + options = InsertOptions(tokens=True, continueOnError=True) + + # Create test data with two records + test_data = { + "records": [ + self.record, + self.record # Duplicate record that will cause error + ] + } + + result = client.insert(test_data, options) + + # Verify complete failure results + self.assertIn("errors", result) + self.assertNotIn("records", result) + self.assertEqual(len(result["errors"]), 2) + From 1c6553f14d3a3149c8c370f7990e74ec51231c81 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 12:22:37 +0000 Subject: [PATCH 29/42] [AUTOMATED] Private Release 1.15.7.dev0+b69c357 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5d6e3251..d1bf86a4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+cce9db4' +current_version = '1.15.7.dev0+b69c357' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 733b2c9d..790a03a2 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+cce9db4' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+b69c357' \ No newline at end of file From f3c530ed2b4489215d1cf7e25b83ef1ac6a24666 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 18:49:52 +0530 Subject: [PATCH 30/42] SK-2325 Updated new env secret in workflows --- .github/workflows/ci.yml | 1 + .github/workflows/main.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06aa9a50..c447d526 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,6 +44,7 @@ jobs: echo SKYFLOW_ID1=${{secrets.SKYFLOW_ID1}} >> .env echo SKYFLOW_ID2=${{secrets.SKYFLOW_ID2}} >> .env echo SKYFLOW_ID3=${{secrets.SKYFLOW_ID3}} >> .env + echo MOCK_TOKEN=${{secrets.MOCK_TOKEN}} >> .env - name: 'Run Tests' run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 48472e78..b0bc7278 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -34,6 +34,7 @@ jobs: echo SKYFLOW_ID1=${{secrets.SKYFLOW_ID1}} >> .env echo SKYFLOW_ID2=${{secrets.SKYFLOW_ID2}} >> .env echo SKYFLOW_ID3=${{secrets.SKYFLOW_ID3}} >> .env + echo MOCK_TOKEN=${{secrets.MOCK_TOKEN}} >> .env - name: 'Run Tests' run: | From 35916c21fe7ab8620b6b600193991cfed8621918 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 13:20:13 +0000 Subject: [PATCH 31/42] [AUTOMATED] Private Release 1.15.7.dev0+f3c530e --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d1bf86a4..20c3abae 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+b69c357' +current_version = '1.15.7.dev0+f3c530e' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 790a03a2..9f437f19 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+b69c357' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+f3c530e' \ No newline at end of file From 706d0d2c0f63d0280a597bc686f583573c6a99e6 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 13:38:46 +0000 Subject: [PATCH 32/42] [AUTOMATED] Public Release - 1.15.8 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 20c3abae..dcfcfc22 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+f3c530e' +current_version = '1.15.8' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 9f437f19..fdc039b8 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+f3c530e' \ No newline at end of file +SDK_VERSION = '1.15.8' \ No newline at end of file From 08b535aa0e96f882937e3efa41a8bfe874dc06e1 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow <156889717+saileshwar-skyflow@users.noreply.github.com> Date: Thu, 6 Nov 2025 19:57:07 +0530 Subject: [PATCH 33/42] SK-2353: Update generated code with updated openapi spec file. (#211) * SK-2353: update generated code with updated openapi spec file --- .github/workflows/shared-build-and-deploy.yml | 5 + .github/workflows/shared-tests.yml | 6 + skyflow/generated/rest/__init__.py | 204 ++- skyflow/generated/rest/audit/client.py | 64 + skyflow/generated/rest/client.py | 16 +- skyflow/generated/rest/core/client_wrapper.py | 19 +- skyflow/generated/rest/files/__init__.py | 64 +- skyflow/generated/rest/files/client.py | 1492 +++++++++-------- skyflow/generated/rest/files/raw_client.py | 1395 +++++++-------- .../generated/rest/files/types/__init__.py | 84 +- ...identify_audio_request_file_data_format.py | 5 - ...tify_audio_request_output_transcription.py | 14 - ...ntify_document_request_file_data_format.py | 5 - ...uest_deidentify_audio_entity_types_item.py | 79 + ...t_deidentify_audio_output_transcription.py | 10 + ...equest_deidentify_pdf_entity_types_item.py | 79 + ...uest_deidentify_image_entity_types_item.py | 79 + ...request_deidentify_image_masking_method.py | 5 + ...t_deidentify_document_entity_types_item.py | 79 + ...identify_presentation_entity_types_item.py | 79 + ...eidentify_spreadsheet_entity_types_item.py | 79 + ...ntify_structured_text_entity_types_item.py | 79 + ...quest_deidentify_text_entity_types_item.py | 79 + ...identify_file_request_entity_types_item.py | 79 + ...identify_image_request_file_data_format.py | 7 - ...deidentify_image_request_masking_method.py | 5 - .../deidentify_presentation_request_file.py | 34 - ...y_presentation_request_file_data_format.py | 5 - ...fy_spreadsheet_request_file_data_format.py | 5 - ...deidentify_structured_text_request_file.py | 34 - ...tructured_text_request_file_data_format.py | 5 - .../types/reidentify_file_request_file.py | 34 - ...eidentify_file_request_file_data_format.py | 7 - skyflow/generated/rest/guardrails/client.py | 49 +- .../generated/rest/guardrails/raw_client.py | 45 +- skyflow/generated/rest/records/client.py | 20 + skyflow/generated/rest/strings/__init__.py | 4 +- skyflow/generated/rest/strings/client.py | 150 +- skyflow/generated/rest/strings/raw_client.py | 152 +- .../generated/rest/strings/types/__init__.py | 4 +- ...entify_string_request_entity_types_item.py | 79 + .../types/reidentify_string_request_format.py | 37 - skyflow/generated/rest/types/__init__.py | 136 +- skyflow/generated/rest/types/allow_regex.py | 5 - .../check_guardrails_response_validation.py | 5 - .../generated/rest/types/configuration_id.py | 3 - .../rest/types/deidentified_file_output.py | 46 + ...ed_file_output_processed_file_extension.py | 29 + ...tified_file_output_processed_file_type.py} | 13 +- .../rest/types/deidentify_file_response.py | 6 +- .../rest/types/deidentify_status_response.py | 74 - .../deidentify_status_response_output_type.py | 5 - .../deidentify_status_response_status.py | 5 - .../rest/types/deidentify_string_response.py | 10 +- ...ponse.py => detect_guardrails_response.py} | 20 +- .../detect_guardrails_response_validation.py | 5 + .../rest/types/detect_runs_response.py | 72 + .../types/detect_runs_response_output_type.py | 5 + .../rest/types/detect_runs_response_status.py | 5 + .../generated/rest/types/entity_location.py | 41 - skyflow/generated/rest/types/entity_types.py | 7 - .../rest/types/error_response_error.py | 13 +- skyflow/generated/rest/types/error_string.py | 3 - ...y_file_response_output.py => file_data.py} | 16 +- .../file_data_data_format.py} | 29 +- .../file_data_deidentify_audio.py} | 16 +- .../file_data_deidentify_audio_data_format.py | 5 + .../file_data_deidentify_document.py} | 16 +- ...le_data_deidentify_document_data_format.py | 5 + .../file_data_deidentify_image.py} | 16 +- .../file_data_deidentify_image_data_format.py | 5 + .../file_data_deidentify_pdf.py} | 12 +- .../file_data_deidentify_presentation.py | 34 + ...ata_deidentify_presentation_data_format.py | 5 + .../file_data_deidentify_spreadsheet.py} | 16 +- ...data_deidentify_spreadsheet_data_format.py | 5 + .../file_data_deidentify_structured_text.py} | 16 +- ..._deidentify_structured_text_data_format.py | 5 + .../file_data_deidentify_text.py} | 12 +- .../rest/types/file_data_reidentify_file.py | 34 + .../file_data_reidentify_file_data_format.py | 7 + .../format.py} | 14 +- .../{entity_type.py => format_masked_item.py} | 72 +- .../rest/types/format_plaintext_item.py | 79 + .../rest/types/format_redacted_item.py | 79 + .../rest/types/{vault_id.py => http_code.py} | 2 +- ...tring_response.py => identify_response.py} | 6 +- ..._output.py => reidentified_file_output.py} | 14 +- ...ed_file_output_processed_file_extension.py | 29 + .../rest/types/reidentify_file_response.py | 13 +- .../reidentify_file_response_output_type.py | 2 +- .../types/reidentify_file_response_status.py | 2 +- .../generated/rest/types/restrict_regex.py | 5 - ...rmations_shift_dates.py => shift_dates.py} | 14 +- .../types/shift_dates_entity_types_item.py | 5 + ..._entity.py => string_response_entities.py} | 8 +- skyflow/generated/rest/types/token_type.py | 39 - .../rest/types/token_type_default.py | 5 - .../rest/types/token_type_mapping.py | 47 + .../rest/types/token_type_mapping_default.py | 5 + .../token_type_mapping_entity_only_item.py | 79 + ...en_type_mapping_entity_unq_counter_item.py | 79 + .../token_type_mapping_vault_token_item.py | 79 + .../rest/types/token_type_without_vault.py | 34 - .../types/token_type_without_vault_default.py | 5 - .../generated/rest/types/transformations.py | 9 +- ...ormations_shift_dates_entity_types_item.py | 5 - .../rest/types/word_character_count.py | 37 + skyflow/generated/rest/version.py | 4 +- skyflow/utils/__init__.py | 2 +- skyflow/utils/_utils.py | 12 +- skyflow/utils/validations/_validations.py | 1 - skyflow/vault/controller/_detect.py | 59 +- tests/utils/test__utils.py | 36 +- tests/vault/controller/test__detect.py | 13 +- 115 files changed, 3941 insertions(+), 2490 deletions(-) delete mode 100644 skyflow/generated/rest/files/types/deidentify_audio_request_file_data_format.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_audio_request_output_transcription.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_document_request_file_data_format.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_output_transcription.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_masking_method.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_deidentify_document_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_deidentify_presentation_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_deidentify_spreadsheet_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_deidentify_structured_text_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_deidentify_text_entity_types_item.py create mode 100644 skyflow/generated/rest/files/types/deidentify_file_request_entity_types_item.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_image_request_file_data_format.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_presentation_request_file.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_presentation_request_file_data_format.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file_data_format.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_structured_text_request_file.py delete mode 100644 skyflow/generated/rest/files/types/deidentify_structured_text_request_file_data_format.py delete mode 100644 skyflow/generated/rest/files/types/reidentify_file_request_file.py delete mode 100644 skyflow/generated/rest/files/types/reidentify_file_request_file_data_format.py create mode 100644 skyflow/generated/rest/strings/types/deidentify_string_request_entity_types_item.py delete mode 100644 skyflow/generated/rest/strings/types/reidentify_string_request_format.py delete mode 100644 skyflow/generated/rest/types/allow_regex.py delete mode 100644 skyflow/generated/rest/types/check_guardrails_response_validation.py delete mode 100644 skyflow/generated/rest/types/configuration_id.py create mode 100644 skyflow/generated/rest/types/deidentified_file_output.py create mode 100644 skyflow/generated/rest/types/deidentified_file_output_processed_file_extension.py rename skyflow/generated/rest/types/{deidentify_file_output_processed_file_type.py => deidentified_file_output_processed_file_type.py} (55%) delete mode 100644 skyflow/generated/rest/types/deidentify_status_response.py delete mode 100644 skyflow/generated/rest/types/deidentify_status_response_output_type.py delete mode 100644 skyflow/generated/rest/types/deidentify_status_response_status.py rename skyflow/generated/rest/types/{check_guardrails_response.py => detect_guardrails_response.py} (52%) create mode 100644 skyflow/generated/rest/types/detect_guardrails_response_validation.py create mode 100644 skyflow/generated/rest/types/detect_runs_response.py create mode 100644 skyflow/generated/rest/types/detect_runs_response_output_type.py create mode 100644 skyflow/generated/rest/types/detect_runs_response_status.py delete mode 100644 skyflow/generated/rest/types/entity_location.py delete mode 100644 skyflow/generated/rest/types/entity_types.py delete mode 100644 skyflow/generated/rest/types/error_string.py rename skyflow/generated/rest/types/{reidentify_file_response_output.py => file_data.py} (53%) rename skyflow/generated/rest/{files/types/deidentify_file_request_file_data_format.py => types/file_data_data_format.py} (89%) rename skyflow/generated/rest/{files/types/deidentify_file_request_file.py => types/file_data_deidentify_audio.py} (53%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_audio_data_format.py rename skyflow/generated/rest/{files/types/deidentify_audio_request_file.py => types/file_data_deidentify_document.py} (54%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_document_data_format.py rename skyflow/generated/rest/{files/types/deidentify_image_request_file.py => types/file_data_deidentify_image.py} (53%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_image_data_format.py rename skyflow/generated/rest/{files/types/deidentify_pdf_request_file.py => types/file_data_deidentify_pdf.py} (66%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_presentation.py create mode 100644 skyflow/generated/rest/types/file_data_deidentify_presentation_data_format.py rename skyflow/generated/rest/{files/types/deidentify_document_request_file.py => types/file_data_deidentify_spreadsheet.py} (53%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_spreadsheet_data_format.py rename skyflow/generated/rest/{files/types/deidentify_spreadsheet_request_file.py => types/file_data_deidentify_structured_text.py} (53%) create mode 100644 skyflow/generated/rest/types/file_data_deidentify_structured_text_data_format.py rename skyflow/generated/rest/{files/types/deidentify_text_request_file.py => types/file_data_deidentify_text.py} (66%) create mode 100644 skyflow/generated/rest/types/file_data_reidentify_file.py create mode 100644 skyflow/generated/rest/types/file_data_reidentify_file_data_format.py rename skyflow/generated/rest/{files/types/reidentify_file_request_format.py => types/format.py} (58%) rename skyflow/generated/rest/types/{entity_type.py => format_masked_item.py} (98%) create mode 100644 skyflow/generated/rest/types/format_plaintext_item.py create mode 100644 skyflow/generated/rest/types/format_redacted_item.py rename skyflow/generated/rest/types/{vault_id.py => http_code.py} (81%) rename skyflow/generated/rest/types/{reidentify_string_response.py => identify_response.py} (78%) rename skyflow/generated/rest/types/{deidentify_file_output.py => reidentified_file_output.py} (56%) create mode 100644 skyflow/generated/rest/types/reidentified_file_output_processed_file_extension.py delete mode 100644 skyflow/generated/rest/types/restrict_regex.py rename skyflow/generated/rest/types/{transformations_shift_dates.py => shift_dates.py} (75%) create mode 100644 skyflow/generated/rest/types/shift_dates_entity_types_item.py rename skyflow/generated/rest/types/{detected_entity.py => string_response_entities.py} (82%) delete mode 100644 skyflow/generated/rest/types/token_type.py delete mode 100644 skyflow/generated/rest/types/token_type_default.py create mode 100644 skyflow/generated/rest/types/token_type_mapping.py create mode 100644 skyflow/generated/rest/types/token_type_mapping_default.py create mode 100644 skyflow/generated/rest/types/token_type_mapping_entity_only_item.py create mode 100644 skyflow/generated/rest/types/token_type_mapping_entity_unq_counter_item.py create mode 100644 skyflow/generated/rest/types/token_type_mapping_vault_token_item.py delete mode 100644 skyflow/generated/rest/types/token_type_without_vault.py delete mode 100644 skyflow/generated/rest/types/token_type_without_vault_default.py delete mode 100644 skyflow/generated/rest/types/transformations_shift_dates_entity_types_item.py create mode 100644 skyflow/generated/rest/types/word_character_count.py diff --git a/.github/workflows/shared-build-and-deploy.yml b/.github/workflows/shared-build-and-deploy.yml index 1b0309dd..e826c20b 100644 --- a/.github/workflows/shared-build-and-deploy.yml +++ b/.github/workflows/shared-build-and-deploy.yml @@ -27,6 +27,11 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel twine + - name: Build and install skyflow package + run: | + python setup.py sdist bdist_wheel + pip install dist/skyflow-*.whl + - name: Resolve Branch for the Tagged Commit id: resolve-branch if: ${{ inputs.tag == 'beta' || inputs.tag == 'public' }} diff --git a/.github/workflows/shared-tests.yml b/.github/workflows/shared-tests.yml index 0ab797d8..73253e3c 100644 --- a/.github/workflows/shared-tests.yml +++ b/.github/workflows/shared-tests.yml @@ -23,6 +23,12 @@ jobs: with: name: "credentials.json" json: ${{ secrets.VALID_SKYFLOW_CREDS_TEST }} + + - name: Build and install skyflow package + run: | + pip install --upgrade pip setuptools wheel + python setup.py sdist bdist_wheel + pip install dist/skyflow-*.whl - name: 'Run Tests' run: | diff --git a/skyflow/generated/rest/__init__.py b/skyflow/generated/rest/__init__.py index 7eda9318..8a59c25d 100644 --- a/skyflow/generated/rest/__init__.py +++ b/skyflow/generated/rest/__init__.py @@ -3,49 +3,69 @@ # isort: skip_file from .types import ( - AllowRegex, AuditEventAuditResourceType, AuditEventContext, AuditEventData, AuditEventHttpInfo, BatchRecordMethod, - CheckGuardrailsResponse, - CheckGuardrailsResponseValidation, - ConfigurationId, ContextAccessType, ContextAuthMode, - DeidentifyFileOutput, - DeidentifyFileOutputProcessedFileType, + DeidentifiedFileOutput, + DeidentifiedFileOutputProcessedFileExtension, + DeidentifiedFileOutputProcessedFileType, DeidentifyFileResponse, - DeidentifyStatusResponse, - DeidentifyStatusResponseOutputType, - DeidentifyStatusResponseStatus, DeidentifyStringResponse, - DetectedEntity, + DetectGuardrailsResponse, + DetectGuardrailsResponseValidation, + DetectRunsResponse, + DetectRunsResponseOutputType, + DetectRunsResponseStatus, DetokenizeRecordResponseValueType, - EntityLocation, - EntityType, - EntityTypes, ErrorResponse, ErrorResponseError, - ErrorString, + FileData, + FileDataDataFormat, + FileDataDeidentifyAudio, + FileDataDeidentifyAudioDataFormat, + FileDataDeidentifyDocument, + FileDataDeidentifyDocumentDataFormat, + FileDataDeidentifyImage, + FileDataDeidentifyImageDataFormat, + FileDataDeidentifyPdf, + FileDataDeidentifyPresentation, + FileDataDeidentifyPresentationDataFormat, + FileDataDeidentifySpreadsheet, + FileDataDeidentifySpreadsheetDataFormat, + FileDataDeidentifyStructuredText, + FileDataDeidentifyStructuredTextDataFormat, + FileDataDeidentifyText, + FileDataReidentifyFile, + FileDataReidentifyFileDataFormat, + Format, + FormatMaskedItem, + FormatPlaintextItem, + FormatRedactedItem, GooglerpcStatus, + HttpCode, + IdentifyResponse, ProtobufAny, RedactionEnumRedaction, + ReidentifiedFileOutput, + ReidentifiedFileOutputProcessedFileExtension, ReidentifyFileResponse, - ReidentifyFileResponseOutput, + ReidentifyFileResponseOutputType, ReidentifyFileResponseStatus, - ReidentifyStringResponse, RequestActionType, ResourceId, - RestrictRegex, - TokenType, - TokenTypeDefault, - TokenTypeWithoutVault, - TokenTypeWithoutVaultDefault, + ShiftDates, + ShiftDatesEntityTypesItem, + StringResponseEntities, + TokenTypeMapping, + TokenTypeMappingDefault, + TokenTypeMappingEntityOnlyItem, + TokenTypeMappingEntityUnqCounterItem, + TokenTypeMappingVaultTokenItem, Transformations, - TransformationsShiftDates, - TransformationsShiftDatesEntityTypesItem, UploadFileV2Response, Uuid, V1AuditAfterOptions, @@ -79,7 +99,7 @@ V1UpdateRecordResponse, V1VaultFieldMapping, V1VaultSchemaConfig, - VaultId, + WordCharacterCount, ) from .errors import BadRequestError, InternalServerError, NotFoundError, UnauthorizedError from . import audit, authentication, bin_lookup, files, guardrails, query, records, strings, tokens @@ -94,38 +114,27 @@ from .client import AsyncSkyflow, Skyflow from .environment import SkyflowEnvironment from .files import ( - DeidentifyAudioRequestFile, - DeidentifyAudioRequestFileDataFormat, - DeidentifyAudioRequestOutputTranscription, - DeidentifyDocumentRequestFile, - DeidentifyDocumentRequestFileDataFormat, - DeidentifyFileRequestFile, - DeidentifyFileRequestFileDataFormat, - DeidentifyImageRequestFile, - DeidentifyImageRequestFileDataFormat, - DeidentifyImageRequestMaskingMethod, - DeidentifyPdfRequestFile, - DeidentifyPresentationRequestFile, - DeidentifyPresentationRequestFileDataFormat, - DeidentifySpreadsheetRequestFile, - DeidentifySpreadsheetRequestFileDataFormat, - DeidentifyStructuredTextRequestFile, - DeidentifyStructuredTextRequestFileDataFormat, - DeidentifyTextRequestFile, - ReidentifyFileRequestFile, - ReidentifyFileRequestFileDataFormat, - ReidentifyFileRequestFormat, + DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem, + DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription, + DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem, + DeidentifyFileImageRequestDeidentifyImageEntityTypesItem, + DeidentifyFileImageRequestDeidentifyImageMaskingMethod, + DeidentifyFileRequestDeidentifyDocumentEntityTypesItem, + DeidentifyFileRequestDeidentifyPresentationEntityTypesItem, + DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem, + DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem, + DeidentifyFileRequestDeidentifyTextEntityTypesItem, + DeidentifyFileRequestEntityTypesItem, ) from .records import ( RecordServiceBulkGetRecordRequestOrderBy, RecordServiceBulkGetRecordRequestRedaction, RecordServiceGetRecordRequestRedaction, ) -from .strings import ReidentifyStringRequestFormat +from .strings import DeidentifyStringRequestEntityTypesItem from .version import __version__ __all__ = [ - "AllowRegex", "AsyncSkyflow", "AuditEventAuditResourceType", "AuditEventContext", @@ -139,45 +148,58 @@ "AuditServiceListAuditEventsRequestSortOpsOrderBy", "BadRequestError", "BatchRecordMethod", - "CheckGuardrailsResponse", - "CheckGuardrailsResponseValidation", - "ConfigurationId", "ContextAccessType", "ContextAuthMode", - "DeidentifyAudioRequestFile", - "DeidentifyAudioRequestFileDataFormat", - "DeidentifyAudioRequestOutputTranscription", - "DeidentifyDocumentRequestFile", - "DeidentifyDocumentRequestFileDataFormat", - "DeidentifyFileOutput", - "DeidentifyFileOutputProcessedFileType", - "DeidentifyFileRequestFile", - "DeidentifyFileRequestFileDataFormat", + "DeidentifiedFileOutput", + "DeidentifiedFileOutputProcessedFileExtension", + "DeidentifiedFileOutputProcessedFileType", + "DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem", + "DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription", + "DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageMaskingMethod", + "DeidentifyFileRequestDeidentifyDocumentEntityTypesItem", + "DeidentifyFileRequestDeidentifyPresentationEntityTypesItem", + "DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem", + "DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem", + "DeidentifyFileRequestDeidentifyTextEntityTypesItem", + "DeidentifyFileRequestEntityTypesItem", "DeidentifyFileResponse", - "DeidentifyImageRequestFile", - "DeidentifyImageRequestFileDataFormat", - "DeidentifyImageRequestMaskingMethod", - "DeidentifyPdfRequestFile", - "DeidentifyPresentationRequestFile", - "DeidentifyPresentationRequestFileDataFormat", - "DeidentifySpreadsheetRequestFile", - "DeidentifySpreadsheetRequestFileDataFormat", - "DeidentifyStatusResponse", - "DeidentifyStatusResponseOutputType", - "DeidentifyStatusResponseStatus", + "DeidentifyStringRequestEntityTypesItem", "DeidentifyStringResponse", - "DeidentifyStructuredTextRequestFile", - "DeidentifyStructuredTextRequestFileDataFormat", - "DeidentifyTextRequestFile", - "DetectedEntity", + "DetectGuardrailsResponse", + "DetectGuardrailsResponseValidation", + "DetectRunsResponse", + "DetectRunsResponseOutputType", + "DetectRunsResponseStatus", "DetokenizeRecordResponseValueType", - "EntityLocation", - "EntityType", - "EntityTypes", "ErrorResponse", "ErrorResponseError", - "ErrorString", + "FileData", + "FileDataDataFormat", + "FileDataDeidentifyAudio", + "FileDataDeidentifyAudioDataFormat", + "FileDataDeidentifyDocument", + "FileDataDeidentifyDocumentDataFormat", + "FileDataDeidentifyImage", + "FileDataDeidentifyImageDataFormat", + "FileDataDeidentifyPdf", + "FileDataDeidentifyPresentation", + "FileDataDeidentifyPresentationDataFormat", + "FileDataDeidentifySpreadsheet", + "FileDataDeidentifySpreadsheetDataFormat", + "FileDataDeidentifyStructuredText", + "FileDataDeidentifyStructuredTextDataFormat", + "FileDataDeidentifyText", + "FileDataReidentifyFile", + "FileDataReidentifyFileDataFormat", + "Format", + "FormatMaskedItem", + "FormatPlaintextItem", + "FormatRedactedItem", "GooglerpcStatus", + "HttpCode", + "IdentifyResponse", "InternalServerError", "NotFoundError", "ProtobufAny", @@ -185,26 +207,24 @@ "RecordServiceBulkGetRecordRequestRedaction", "RecordServiceGetRecordRequestRedaction", "RedactionEnumRedaction", - "ReidentifyFileRequestFile", - "ReidentifyFileRequestFileDataFormat", - "ReidentifyFileRequestFormat", + "ReidentifiedFileOutput", + "ReidentifiedFileOutputProcessedFileExtension", "ReidentifyFileResponse", - "ReidentifyFileResponseOutput", + "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", - "ReidentifyStringRequestFormat", - "ReidentifyStringResponse", "RequestActionType", "ResourceId", - "RestrictRegex", + "ShiftDates", + "ShiftDatesEntityTypesItem", "Skyflow", "SkyflowEnvironment", - "TokenType", - "TokenTypeDefault", - "TokenTypeWithoutVault", - "TokenTypeWithoutVaultDefault", + "StringResponseEntities", + "TokenTypeMapping", + "TokenTypeMappingDefault", + "TokenTypeMappingEntityOnlyItem", + "TokenTypeMappingEntityUnqCounterItem", + "TokenTypeMappingVaultTokenItem", "Transformations", - "TransformationsShiftDates", - "TransformationsShiftDatesEntityTypesItem", "UnauthorizedError", "UploadFileV2Response", "Uuid", @@ -239,7 +259,7 @@ "V1UpdateRecordResponse", "V1VaultFieldMapping", "V1VaultSchemaConfig", - "VaultId", + "WordCharacterCount", "__version__", "audit", "authentication", diff --git a/skyflow/generated/rest/audit/client.py b/skyflow/generated/rest/audit/client.py index 34d589d1..6f1d1039 100644 --- a/skyflow/generated/rest/audit/client.py +++ b/skyflow/generated/rest/audit/client.py @@ -205,7 +205,39 @@ def audit_service_list_audit_events( token="YOUR_TOKEN", ) client.audit.audit_service_list_audit_events( + filter_ops_context_change_id="filterOps.context.changeID", + filter_ops_context_request_id="filterOps.context.requestID", + filter_ops_context_trace_id="filterOps.context.traceID", + filter_ops_context_session_id="filterOps.context.sessionID", + filter_ops_context_actor="filterOps.context.actor", + filter_ops_context_actor_type="NONE", + filter_ops_context_access_type="ACCESS_NONE", + filter_ops_context_ip_address="filterOps.context.ipAddress", + filter_ops_context_origin="filterOps.context.origin", + filter_ops_context_auth_mode="AUTH_NONE", + filter_ops_context_jwt_id="filterOps.context.jwtID", + filter_ops_context_bearer_token_context_id="filterOps.context.bearerTokenContextID", + filter_ops_parent_account_id="filterOps.parentAccountID", filter_ops_account_id="filterOps.accountID", + filter_ops_workspace_id="filterOps.workspaceID", + filter_ops_vault_id="filterOps.vaultID", + filter_ops_resource_i_ds="filterOps.resourceIDs", + filter_ops_action_type="NONE", + filter_ops_resource_type="NONE_API", + filter_ops_tags="filterOps.tags", + filter_ops_response_code=1, + filter_ops_start_time="filterOps.startTime", + filter_ops_end_time="filterOps.endTime", + filter_ops_api_name="filterOps.apiName", + filter_ops_response_message="filterOps.responseMessage", + filter_ops_http_method="filterOps.httpMethod", + filter_ops_http_uri="filterOps.httpURI", + sort_ops_sort_by="sortOps.sortBy", + sort_ops_order_by="ASCENDING", + after_ops_timestamp="afterOps.timestamp", + after_ops_change_id="afterOps.changeID", + limit=1000000, + offset=1000000, ) """ _response = self._raw_client.audit_service_list_audit_events( @@ -431,7 +463,39 @@ async def audit_service_list_audit_events( async def main() -> None: await client.audit.audit_service_list_audit_events( + filter_ops_context_change_id="filterOps.context.changeID", + filter_ops_context_request_id="filterOps.context.requestID", + filter_ops_context_trace_id="filterOps.context.traceID", + filter_ops_context_session_id="filterOps.context.sessionID", + filter_ops_context_actor="filterOps.context.actor", + filter_ops_context_actor_type="NONE", + filter_ops_context_access_type="ACCESS_NONE", + filter_ops_context_ip_address="filterOps.context.ipAddress", + filter_ops_context_origin="filterOps.context.origin", + filter_ops_context_auth_mode="AUTH_NONE", + filter_ops_context_jwt_id="filterOps.context.jwtID", + filter_ops_context_bearer_token_context_id="filterOps.context.bearerTokenContextID", + filter_ops_parent_account_id="filterOps.parentAccountID", filter_ops_account_id="filterOps.accountID", + filter_ops_workspace_id="filterOps.workspaceID", + filter_ops_vault_id="filterOps.vaultID", + filter_ops_resource_i_ds="filterOps.resourceIDs", + filter_ops_action_type="NONE", + filter_ops_resource_type="NONE_API", + filter_ops_tags="filterOps.tags", + filter_ops_response_code=1, + filter_ops_start_time="filterOps.startTime", + filter_ops_end_time="filterOps.endTime", + filter_ops_api_name="filterOps.apiName", + filter_ops_response_message="filterOps.responseMessage", + filter_ops_http_method="filterOps.httpMethod", + filter_ops_http_uri="filterOps.httpURI", + sort_ops_sort_by="sortOps.sortBy", + sort_ops_order_by="ASCENDING", + after_ops_timestamp="afterOps.timestamp", + after_ops_change_id="afterOps.changeID", + limit=1000000, + offset=1000000, ) diff --git a/skyflow/generated/rest/client.py b/skyflow/generated/rest/client.py index e111c0b2..e8abdf54 100644 --- a/skyflow/generated/rest/client.py +++ b/skyflow/generated/rest/client.py @@ -34,7 +34,7 @@ class Skyflow: - token : typing.Union[str, typing.Callable[[], str]] + token : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -61,7 +61,7 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: SkyflowEnvironment = SkyflowEnvironment.PRODUCTION, - token: typing.Union[str, typing.Callable[[], str]], + token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None, headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -87,9 +87,9 @@ def __init__( self.tokens = TokensClient(client_wrapper=self._client_wrapper) self.query = QueryClient(client_wrapper=self._client_wrapper) self.authentication = AuthenticationClient(client_wrapper=self._client_wrapper) - self.guardrails = GuardrailsClient(client_wrapper=self._client_wrapper) - self.strings = StringsClient(client_wrapper=self._client_wrapper) self.files = FilesClient(client_wrapper=self._client_wrapper) + self.strings = StringsClient(client_wrapper=self._client_wrapper) + self.guardrails = GuardrailsClient(client_wrapper=self._client_wrapper) class AsyncSkyflow: @@ -110,7 +110,7 @@ class AsyncSkyflow: - token : typing.Union[str, typing.Callable[[], str]] + token : typing.Optional[typing.Union[str, typing.Callable[[], str]]] headers : typing.Optional[typing.Dict[str, str]] Additional headers to send with every request. @@ -137,7 +137,7 @@ def __init__( *, base_url: typing.Optional[str] = None, environment: SkyflowEnvironment = SkyflowEnvironment.PRODUCTION, - token: typing.Union[str, typing.Callable[[], str]], + token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None, headers: typing.Optional[typing.Dict[str, str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, @@ -163,9 +163,9 @@ def __init__( self.tokens = AsyncTokensClient(client_wrapper=self._client_wrapper) self.query = AsyncQueryClient(client_wrapper=self._client_wrapper) self.authentication = AsyncAuthenticationClient(client_wrapper=self._client_wrapper) - self.guardrails = AsyncGuardrailsClient(client_wrapper=self._client_wrapper) - self.strings = AsyncStringsClient(client_wrapper=self._client_wrapper) self.files = AsyncFilesClient(client_wrapper=self._client_wrapper) + self.strings = AsyncStringsClient(client_wrapper=self._client_wrapper) + self.guardrails = AsyncGuardrailsClient(client_wrapper=self._client_wrapper) def _get_base_url(*, base_url: typing.Optional[str] = None, environment: SkyflowEnvironment) -> str: diff --git a/skyflow/generated/rest/core/client_wrapper.py b/skyflow/generated/rest/core/client_wrapper.py index a3210a7e..355d775e 100644 --- a/skyflow/generated/rest/core/client_wrapper.py +++ b/skyflow/generated/rest/core/client_wrapper.py @@ -10,7 +10,7 @@ class BaseClientWrapper: def __init__( self, *, - token: typing.Union[str, typing.Callable[[], str]], + token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None, headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -22,16 +22,19 @@ def __init__( def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { + "User-Agent": "skyflow/1.16.1", "X-Fern-Language": "Python", - "X-Fern-SDK-Name": "skyflow_vault", - "X-Fern-SDK-Version": "0.0.252", + "X-Fern-SDK-Name": "skyflow", + "X-Fern-SDK-Version": "1.16.1", **(self.get_custom_headers() or {}), } - headers["Authorization"] = f"Bearer {self._get_token()}" + token = self._get_token() + if token is not None: + headers["Authorization"] = f"Bearer {token}" return headers - def _get_token(self) -> str: - if isinstance(self._token, str): + def _get_token(self) -> typing.Optional[str]: + if isinstance(self._token, str) or self._token is None: return self._token else: return self._token() @@ -50,7 +53,7 @@ class SyncClientWrapper(BaseClientWrapper): def __init__( self, *, - token: typing.Union[str, typing.Callable[[], str]], + token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None, headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, @@ -69,7 +72,7 @@ class AsyncClientWrapper(BaseClientWrapper): def __init__( self, *, - token: typing.Union[str, typing.Callable[[], str]], + token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None, headers: typing.Optional[typing.Dict[str, str]] = None, base_url: str, timeout: typing.Optional[float] = None, diff --git a/skyflow/generated/rest/files/__init__.py b/skyflow/generated/rest/files/__init__.py index b1679867..f313ad67 100644 --- a/skyflow/generated/rest/files/__init__.py +++ b/skyflow/generated/rest/files/__init__.py @@ -3,49 +3,29 @@ # isort: skip_file from .types import ( - DeidentifyAudioRequestFile, - DeidentifyAudioRequestFileDataFormat, - DeidentifyAudioRequestOutputTranscription, - DeidentifyDocumentRequestFile, - DeidentifyDocumentRequestFileDataFormat, - DeidentifyFileRequestFile, - DeidentifyFileRequestFileDataFormat, - DeidentifyImageRequestFile, - DeidentifyImageRequestFileDataFormat, - DeidentifyImageRequestMaskingMethod, - DeidentifyPdfRequestFile, - DeidentifyPresentationRequestFile, - DeidentifyPresentationRequestFileDataFormat, - DeidentifySpreadsheetRequestFile, - DeidentifySpreadsheetRequestFileDataFormat, - DeidentifyStructuredTextRequestFile, - DeidentifyStructuredTextRequestFileDataFormat, - DeidentifyTextRequestFile, - ReidentifyFileRequestFile, - ReidentifyFileRequestFileDataFormat, - ReidentifyFileRequestFormat, + DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem, + DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription, + DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem, + DeidentifyFileImageRequestDeidentifyImageEntityTypesItem, + DeidentifyFileImageRequestDeidentifyImageMaskingMethod, + DeidentifyFileRequestDeidentifyDocumentEntityTypesItem, + DeidentifyFileRequestDeidentifyPresentationEntityTypesItem, + DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem, + DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem, + DeidentifyFileRequestDeidentifyTextEntityTypesItem, + DeidentifyFileRequestEntityTypesItem, ) __all__ = [ - "DeidentifyAudioRequestFile", - "DeidentifyAudioRequestFileDataFormat", - "DeidentifyAudioRequestOutputTranscription", - "DeidentifyDocumentRequestFile", - "DeidentifyDocumentRequestFileDataFormat", - "DeidentifyFileRequestFile", - "DeidentifyFileRequestFileDataFormat", - "DeidentifyImageRequestFile", - "DeidentifyImageRequestFileDataFormat", - "DeidentifyImageRequestMaskingMethod", - "DeidentifyPdfRequestFile", - "DeidentifyPresentationRequestFile", - "DeidentifyPresentationRequestFileDataFormat", - "DeidentifySpreadsheetRequestFile", - "DeidentifySpreadsheetRequestFileDataFormat", - "DeidentifyStructuredTextRequestFile", - "DeidentifyStructuredTextRequestFileDataFormat", - "DeidentifyTextRequestFile", - "ReidentifyFileRequestFile", - "ReidentifyFileRequestFileDataFormat", - "ReidentifyFileRequestFormat", + "DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem", + "DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription", + "DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageMaskingMethod", + "DeidentifyFileRequestDeidentifyDocumentEntityTypesItem", + "DeidentifyFileRequestDeidentifyPresentationEntityTypesItem", + "DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem", + "DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem", + "DeidentifyFileRequestDeidentifyTextEntityTypesItem", + "DeidentifyFileRequestEntityTypesItem", ] diff --git a/skyflow/generated/rest/files/client.py b/skyflow/generated/rest/files/client.py index 4d5d548b..539d2161 100644 --- a/skyflow/generated/rest/files/client.py +++ b/skyflow/generated/rest/files/client.py @@ -4,32 +4,54 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions -from ..types.allow_regex import AllowRegex -from ..types.configuration_id import ConfigurationId from ..types.deidentify_file_response import DeidentifyFileResponse -from ..types.deidentify_status_response import DeidentifyStatusResponse -from ..types.entity_types import EntityTypes +from ..types.detect_runs_response import DetectRunsResponse +from ..types.file_data import FileData +from ..types.file_data_deidentify_audio import FileDataDeidentifyAudio +from ..types.file_data_deidentify_document import FileDataDeidentifyDocument +from ..types.file_data_deidentify_image import FileDataDeidentifyImage +from ..types.file_data_deidentify_pdf import FileDataDeidentifyPdf +from ..types.file_data_deidentify_presentation import FileDataDeidentifyPresentation +from ..types.file_data_deidentify_spreadsheet import FileDataDeidentifySpreadsheet +from ..types.file_data_deidentify_structured_text import FileDataDeidentifyStructuredText +from ..types.file_data_deidentify_text import FileDataDeidentifyText +from ..types.file_data_reidentify_file import FileDataReidentifyFile +from ..types.format import Format from ..types.reidentify_file_response import ReidentifyFileResponse -from ..types.resource_id import ResourceId -from ..types.restrict_regex import RestrictRegex -from ..types.token_type_without_vault import TokenTypeWithoutVault +from ..types.token_type_mapping import TokenTypeMapping from ..types.transformations import Transformations -from ..types.uuid_ import Uuid -from ..types.vault_id import VaultId from .raw_client import AsyncRawFilesClient, RawFilesClient -from .types.deidentify_audio_request_file import DeidentifyAudioRequestFile -from .types.deidentify_audio_request_output_transcription import DeidentifyAudioRequestOutputTranscription -from .types.deidentify_document_request_file import DeidentifyDocumentRequestFile -from .types.deidentify_file_request_file import DeidentifyFileRequestFile -from .types.deidentify_image_request_file import DeidentifyImageRequestFile -from .types.deidentify_image_request_masking_method import DeidentifyImageRequestMaskingMethod -from .types.deidentify_pdf_request_file import DeidentifyPdfRequestFile -from .types.deidentify_presentation_request_file import DeidentifyPresentationRequestFile -from .types.deidentify_spreadsheet_request_file import DeidentifySpreadsheetRequestFile -from .types.deidentify_structured_text_request_file import DeidentifyStructuredTextRequestFile -from .types.deidentify_text_request_file import DeidentifyTextRequestFile -from .types.reidentify_file_request_file import ReidentifyFileRequestFile -from .types.reidentify_file_request_format import ReidentifyFileRequestFormat +from .types.deidentify_file_audio_request_deidentify_audio_entity_types_item import ( + DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem, +) +from .types.deidentify_file_audio_request_deidentify_audio_output_transcription import ( + DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription, +) +from .types.deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item import ( + DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem, +) +from .types.deidentify_file_image_request_deidentify_image_entity_types_item import ( + DeidentifyFileImageRequestDeidentifyImageEntityTypesItem, +) +from .types.deidentify_file_image_request_deidentify_image_masking_method import ( + DeidentifyFileImageRequestDeidentifyImageMaskingMethod, +) +from .types.deidentify_file_request_deidentify_document_entity_types_item import ( + DeidentifyFileRequestDeidentifyDocumentEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_presentation_entity_types_item import ( + DeidentifyFileRequestDeidentifyPresentationEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_spreadsheet_entity_types_item import ( + DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_structured_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyTextEntityTypesItem, +) +from .types.deidentify_file_request_entity_types_item import DeidentifyFileRequestEntityTypesItem # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -53,14 +75,14 @@ def with_raw_response(self) -> RawFilesClient: def deidentify_file( self, *, - vault_id: VaultId, - file: DeidentifyFileRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileData, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ @@ -68,439 +90,492 @@ def deidentify_file( Parameters ---------- - vault_id : VaultId + file : FileData - file : DeidentifyFileRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyFileRequestFile + from skyflow import FileData, Skyflow client = Skyflow( token="YOUR_TOKEN", ) client.files.deidentify_file( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyFileRequestFile( - base_64="Zm9vYmFy", - data_format="txt", + file=FileData( + base_64="base64", + data_format="mp3", ), + vault_id="vault_id", ) """ _response = self._raw_client.deidentify_file( - vault_id=vault_id, file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_document( + def deidentify_audio( self, *, - vault_id: VaultId, - file: DeidentifyDocumentRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyAudio, + vault_id: str, + output_transcription: typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] = OMIT, + output_processed_audio: typing.Optional[bool] = OMIT, + bleep_start_padding: typing.Optional[float] = OMIT, + bleep_stop_padding: typing.Optional[float] = OMIT, + bleep_frequency: typing.Optional[int] = OMIT, + bleep_gain: typing.Optional[int] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. + De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyAudio - file : DeidentifyDocumentRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + output_transcription : typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] + Type of transcription to output. - entity_types : typing.Optional[EntityTypes] + output_processed_audio : typing.Optional[bool] + Whether to include the processed audio file in the response. - token_type : typing.Optional[TokenTypeWithoutVault] + bleep_start_padding : typing.Optional[float] + Padding added to the beginning of a bleep, in seconds. - allow_regex : typing.Optional[AllowRegex] + bleep_stop_padding : typing.Optional[float] + Padding added to the end of a bleep, in seconds. - restrict_regex : typing.Optional[RestrictRegex] + bleep_frequency : typing.Optional[int] + The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. + + bleep_gain : typing.Optional[int] + Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. + + entity_types : typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] + Entities to detect and de-identify. + + token_type : typing.Optional[TokenTypeMapping] + + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. + + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyDocumentRequestFile + from skyflow import FileDataDeidentifyAudio, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_document( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyDocumentRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="docx", + client.files.deidentify_audio( + file=FileDataDeidentifyAudio( + base_64="base64", + data_format="mp3", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_document( - vault_id=vault_id, + _response = self._raw_client.deidentify_audio( file=file, - configuration_id=configuration_id, + vault_id=vault_id, + output_transcription=output_transcription, + output_processed_audio=output_processed_audio, + bleep_start_padding=bleep_start_padding, + bleep_stop_padding=bleep_stop_padding, + bleep_frequency=bleep_frequency, + bleep_gain=bleep_gain, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_pdf( + def deidentify_document( self, *, - vault_id: VaultId, - file: DeidentifyPdfRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[float] = OMIT, - max_resolution: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyDocument, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. + De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyDocument - file : DeidentifyPdfRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] + Entities to detect and de-identify. - density : typing.Optional[float] - Pixel density at which to process the PDF file. + token_type : typing.Optional[TokenTypeMapping] - max_resolution : typing.Optional[float] - Max resolution at which to process the PDF file. + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - entity_types : typing.Optional[EntityTypes] - - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyPdfRequestFile + from skyflow import FileDataDeidentifyDocument, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_pdf( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyPdfRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", + client.files.deidentify_document( + file=FileDataDeidentifyDocument( + base_64="base64", + data_format="pdf", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_pdf( - vault_id=vault_id, + _response = self._raw_client.deidentify_document( file=file, - configuration_id=configuration_id, - density=density, - max_resolution=max_resolution, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_image( + def deidentify_pdf( self, *, - vault_id: VaultId, - file: DeidentifyImageRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_image: typing.Optional[bool] = OMIT, - output_ocr_text: typing.Optional[bool] = OMIT, - masking_method: typing.Optional[DeidentifyImageRequestMaskingMethod] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPdf, + vault_id: str, + density: typing.Optional[int] = OMIT, + max_resolution: typing.Optional[int] = OMIT, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyImageRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyPdf - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - output_processed_image : typing.Optional[bool] - If `true`, includes processed image in the output. - - output_ocr_text : typing.Optional[bool] - If `true`, includes OCR text output in the response. + density : typing.Optional[int] + Pixel density at which to process the PDF file. - masking_method : typing.Optional[DeidentifyImageRequestMaskingMethod] - Method to mask the entities in the image. + max_resolution : typing.Optional[int] + Max resolution at which to process the PDF file. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyImageRequestFile + from skyflow import FileDataDeidentifyPdf, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_image( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyImageRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="jpg", + client.files.deidentify_pdf( + file=FileDataDeidentifyPdf( + base_64="base64", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_image( - vault_id=vault_id, + _response = self._raw_client.deidentify_pdf( file=file, - configuration_id=configuration_id, - output_processed_image=output_processed_image, - output_ocr_text=output_ocr_text, - masking_method=masking_method, + vault_id=vault_id, + density=density, + max_resolution=max_resolution, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_text( + def deidentify_image( self, *, - vault_id: VaultId, - file: DeidentifyTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyImage, + vault_id: str, + output_processed_image: typing.Optional[bool] = OMIT, + output_ocr_text: typing.Optional[bool] = OMIT, + masking_method: typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. + De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyImage - file : DeidentifyTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + output_processed_image : typing.Optional[bool] + If `true`, includes processed image in the output. + + output_ocr_text : typing.Optional[bool] + If `true`, includes text detected by OCR in the response. - entity_types : typing.Optional[EntityTypes] + masking_method : typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] + Method to mask the entities in the image. + + entity_types : typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyTextRequestFile + from skyflow import FileDataDeidentifyImage, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_text( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyTextRequestFile( - base_64="Zm9vYmFy", + client.files.deidentify_image( + file=FileDataDeidentifyImage( + base_64="base64", + data_format="jpg", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_text( - vault_id=vault_id, + _response = self._raw_client.deidentify_image( file=file, - configuration_id=configuration_id, + vault_id=vault_id, + output_processed_image=output_processed_image, + output_ocr_text=output_ocr_text, + masking_method=masking_method, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_structured_text( + def deidentify_presentation( self, *, - vault_id: VaultId, - file: DeidentifyStructuredTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPresentation, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPresentation - file : DeidentifyStructuredTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyStructuredTextRequestFile + from skyflow import FileDataDeidentifyPresentation, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_structured_text( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyStructuredTextRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="json", + client.files.deidentify_presentation( + file=FileDataDeidentifyPresentation( + base_64="base64", + data_format="ppt", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_structured_text( - vault_id=vault_id, + _response = self._raw_client.deidentify_presentation( file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data @@ -508,14 +583,16 @@ def deidentify_structured_text( def deidentify_spreadsheet( self, *, - vault_id: VaultId, - file: DeidentifySpreadsheetRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifySpreadsheet, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ @@ -523,319 +600,299 @@ def deidentify_spreadsheet( Parameters ---------- - vault_id : VaultId - - file : DeidentifySpreadsheetRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifySpreadsheet - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifySpreadsheetRequestFile + from skyflow import FileDataDeidentifySpreadsheet, Skyflow client = Skyflow( token="YOUR_TOKEN", ) client.files.deidentify_spreadsheet( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifySpreadsheetRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", + file=FileDataDeidentifySpreadsheet( + base_64="base64", data_format="csv", ), + vault_id="vault_id", ) """ _response = self._raw_client.deidentify_spreadsheet( - vault_id=vault_id, file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_presentation( + def deidentify_structured_text( self, *, - vault_id: VaultId, - file: DeidentifyPresentationRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyStructuredText, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyStructuredText - file : DeidentifyPresentationRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyPresentationRequestFile + from skyflow import FileDataDeidentifyStructuredText, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_presentation( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyPresentationRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="pptx", + client.files.deidentify_structured_text( + file=FileDataDeidentifyStructuredText( + base_64="base64", + data_format="json", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_presentation( - vault_id=vault_id, + _response = self._raw_client.deidentify_structured_text( file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def deidentify_audio( + def deidentify_text( self, *, - vault_id: VaultId, - file: DeidentifyAudioRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_audio: typing.Optional[bool] = OMIT, - output_transcription: typing.Optional[DeidentifyAudioRequestOutputTranscription] = OMIT, - bleep_gain: typing.Optional[float] = OMIT, - bleep_frequency: typing.Optional[float] = OMIT, - bleep_start_padding: typing.Optional[float] = OMIT, - bleep_stop_padding: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyText, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyText - file : DeidentifyAudioRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] + Entities to detect and de-identify. - output_processed_audio : typing.Optional[bool] - If `true`, includes processed audio file in the response. + token_type : typing.Optional[TokenTypeMapping] - output_transcription : typing.Optional[DeidentifyAudioRequestOutputTranscription] - Type of transcription to output. + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - bleep_gain : typing.Optional[float] - Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - - bleep_frequency : typing.Optional[float] - The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - - bleep_start_padding : typing.Optional[float] - Padding added to the beginning of a bleep, in seconds. - - bleep_stop_padding : typing.Optional[float] - Padding added to the end of a bleep, in seconds. - - entity_types : typing.Optional[EntityTypes] - - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- - from skyflow import Skyflow - from skyflow.files import DeidentifyAudioRequestFile + from skyflow import FileDataDeidentifyText, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.deidentify_audio( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyAudioRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="mp3", + client.files.deidentify_text( + file=FileDataDeidentifyText( + base_64="base64", ), + vault_id="vault_id", ) """ - _response = self._raw_client.deidentify_audio( - vault_id=vault_id, + _response = self._raw_client.deidentify_text( file=file, - configuration_id=configuration_id, - output_processed_audio=output_processed_audio, - output_transcription=output_transcription, - bleep_gain=bleep_gain, - bleep_frequency=bleep_frequency, - bleep_start_padding=bleep_start_padding, - bleep_stop_padding=bleep_stop_padding, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - def get_run( - self, run_id: Uuid, *, vault_id: ResourceId, request_options: typing.Optional[RequestOptions] = None - ) -> DeidentifyStatusResponse: + def reidentify_file( + self, + *, + file: FileDataReidentifyFile, + vault_id: str, + format: typing.Optional[Format] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> ReidentifyFileResponse: """ - Returns the status of the detect run. + Re-identifies tokens in a file. Parameters ---------- - run_id : Uuid - ID of the detect run. + file : FileDataReidentifyFile - vault_id : ResourceId - ID of the vault. + vault_id : str + ID of the vault where the entities are stored. + + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - DeidentifyStatusResponse - A successful response. + ReidentifyFileResponse + OK Examples -------- - from skyflow import Skyflow + from skyflow import FileDataReidentifyFile, Skyflow client = Skyflow( token="YOUR_TOKEN", ) - client.files.get_run( - run_id="run_id", + client.files.reidentify_file( + file=FileDataReidentifyFile( + base_64="base64", + data_format="txt", + ), vault_id="vault_id", ) """ - _response = self._raw_client.get_run(run_id, vault_id=vault_id, request_options=request_options) + _response = self._raw_client.reidentify_file( + file=file, vault_id=vault_id, format=format, request_options=request_options + ) return _response.data - def reidentify_file( + def get_run( self, + run_id: str, *, - vault_id: VaultId, - file: ReidentifyFileRequestFile, - format: typing.Optional[ReidentifyFileRequestFormat] = OMIT, + vault_id: typing.Optional[str] = None, request_options: typing.Optional[RequestOptions] = None, - ) -> ReidentifyFileResponse: + ) -> DetectRunsResponse: """ - Re-identifies tokens in a file. + Returns the status of a detect run. Parameters ---------- - vault_id : VaultId + run_id : str - file : ReidentifyFileRequestFile - File to re-identify. Files are specified as Base64-encoded data or an EFS path. - - format : typing.Optional[ReidentifyFileRequestFormat] - Mapping of preferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + vault_id : typing.Optional[str] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - ReidentifyFileResponse - A successful response. + DetectRunsResponse + OK Examples -------- from skyflow import Skyflow - from skyflow.files import ReidentifyFileRequestFile client = Skyflow( token="YOUR_TOKEN", ) - client.files.reidentify_file( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=ReidentifyFileRequestFile( - base_64="Zm9vYmFy", - data_format="txt", - ), + client.files.get_run( + run_id="run_id", + vault_id="vault_id", ) """ - _response = self._raw_client.reidentify_file( - vault_id=vault_id, file=file, format=format, request_options=request_options - ) + _response = self._raw_client.get_run(run_id, vault_id=vault_id, request_options=request_options) return _response.data @@ -857,14 +914,14 @@ def with_raw_response(self) -> AsyncRawFilesClient: async def deidentify_file( self, *, - vault_id: VaultId, - file: DeidentifyFileRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileData, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ @@ -872,37 +929,40 @@ async def deidentify_file( Parameters ---------- - vault_id : VaultId + file : FileData - file : DeidentifyFileRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyFileRequestFile + from skyflow import AsyncSkyflow, FileData client = AsyncSkyflow( token="YOUR_TOKEN", @@ -911,78 +971,105 @@ async def deidentify_file( async def main() -> None: await client.files.deidentify_file( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyFileRequestFile( - base_64="Zm9vYmFy", - data_format="txt", + file=FileData( + base_64="base64", + data_format="mp3", ), + vault_id="vault_id", ) asyncio.run(main()) """ _response = await self._raw_client.deidentify_file( - vault_id=vault_id, file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_document( + async def deidentify_audio( self, *, - vault_id: VaultId, - file: DeidentifyDocumentRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyAudio, + vault_id: str, + output_transcription: typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] = OMIT, + output_processed_audio: typing.Optional[bool] = OMIT, + bleep_start_padding: typing.Optional[float] = OMIT, + bleep_stop_padding: typing.Optional[float] = OMIT, + bleep_frequency: typing.Optional[int] = OMIT, + bleep_gain: typing.Optional[int] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. + De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyAudio - file : DeidentifyDocumentRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + output_transcription : typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] + Type of transcription to output. - entity_types : typing.Optional[EntityTypes] + output_processed_audio : typing.Optional[bool] + Whether to include the processed audio file in the response. - token_type : typing.Optional[TokenTypeWithoutVault] + bleep_start_padding : typing.Optional[float] + Padding added to the beginning of a bleep, in seconds. - allow_regex : typing.Optional[AllowRegex] + bleep_stop_padding : typing.Optional[float] + Padding added to the end of a bleep, in seconds. - restrict_regex : typing.Optional[RestrictRegex] + bleep_frequency : typing.Optional[int] + The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. + + bleep_gain : typing.Optional[int] + Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. + + entity_types : typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] + Entities to detect and de-identify. + + token_type : typing.Optional[TokenTypeMapping] + + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. + + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyDocumentRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyAudio client = AsyncSkyflow( token="YOUR_TOKEN", @@ -990,87 +1077,88 @@ async def deidentify_document( async def main() -> None: - await client.files.deidentify_document( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyDocumentRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="docx", + await client.files.deidentify_audio( + file=FileDataDeidentifyAudio( + base_64="base64", + data_format="mp3", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_document( - vault_id=vault_id, + _response = await self._raw_client.deidentify_audio( file=file, - configuration_id=configuration_id, + vault_id=vault_id, + output_transcription=output_transcription, + output_processed_audio=output_processed_audio, + bleep_start_padding=bleep_start_padding, + bleep_stop_padding=bleep_stop_padding, + bleep_frequency=bleep_frequency, + bleep_gain=bleep_gain, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_pdf( + async def deidentify_document( self, *, - vault_id: VaultId, - file: DeidentifyPdfRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[float] = OMIT, - max_resolution: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyDocument, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. + De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyDocument - file : DeidentifyPdfRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] + Entities to detect and de-identify. - density : typing.Optional[float] - Pixel density at which to process the PDF file. + token_type : typing.Optional[TokenTypeMapping] - max_resolution : typing.Optional[float] - Max resolution at which to process the PDF file. + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - entity_types : typing.Optional[EntityTypes] - - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyPdfRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyDocument client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1078,92 +1166,92 @@ async def deidentify_pdf( async def main() -> None: - await client.files.deidentify_pdf( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyPdfRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", + await client.files.deidentify_document( + file=FileDataDeidentifyDocument( + base_64="base64", + data_format="pdf", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_pdf( - vault_id=vault_id, + _response = await self._raw_client.deidentify_document( file=file, - configuration_id=configuration_id, - density=density, - max_resolution=max_resolution, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_image( + async def deidentify_pdf( self, *, - vault_id: VaultId, - file: DeidentifyImageRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_image: typing.Optional[bool] = OMIT, - output_ocr_text: typing.Optional[bool] = OMIT, - masking_method: typing.Optional[DeidentifyImageRequestMaskingMethod] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPdf, + vault_id: str, + density: typing.Optional[int] = OMIT, + max_resolution: typing.Optional[int] = OMIT, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyImageRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyPdf - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - output_processed_image : typing.Optional[bool] - If `true`, includes processed image in the output. - - output_ocr_text : typing.Optional[bool] - If `true`, includes OCR text output in the response. + density : typing.Optional[int] + Pixel density at which to process the PDF file. - masking_method : typing.Optional[DeidentifyImageRequestMaskingMethod] - Method to mask the entities in the image. + max_resolution : typing.Optional[int] + Max resolution at which to process the PDF file. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyImageRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyPdf client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1171,82 +1259,95 @@ async def deidentify_image( async def main() -> None: - await client.files.deidentify_image( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyImageRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="jpg", + await client.files.deidentify_pdf( + file=FileDataDeidentifyPdf( + base_64="base64", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_image( - vault_id=vault_id, + _response = await self._raw_client.deidentify_pdf( file=file, - configuration_id=configuration_id, - output_processed_image=output_processed_image, - output_ocr_text=output_ocr_text, - masking_method=masking_method, + vault_id=vault_id, + density=density, + max_resolution=max_resolution, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_text( + async def deidentify_image( self, *, - vault_id: VaultId, - file: DeidentifyTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyImage, + vault_id: str, + output_processed_image: typing.Optional[bool] = OMIT, + output_ocr_text: typing.Optional[bool] = OMIT, + masking_method: typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. + De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyImage - file : DeidentifyTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + output_processed_image : typing.Optional[bool] + If `true`, includes processed image in the output. + + output_ocr_text : typing.Optional[bool] + If `true`, includes text detected by OCR in the response. - entity_types : typing.Optional[EntityTypes] + masking_method : typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] + Method to mask the entities in the image. + + entity_types : typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyTextRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyImage client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1254,78 +1355,87 @@ async def deidentify_text( async def main() -> None: - await client.files.deidentify_text( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyTextRequestFile( - base_64="Zm9vYmFy", + await client.files.deidentify_image( + file=FileDataDeidentifyImage( + base_64="base64", + data_format="jpg", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_text( - vault_id=vault_id, + _response = await self._raw_client.deidentify_image( file=file, - configuration_id=configuration_id, + vault_id=vault_id, + output_processed_image=output_processed_image, + output_ocr_text=output_ocr_text, + masking_method=masking_method, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_structured_text( + async def deidentify_presentation( self, *, - vault_id: VaultId, - file: DeidentifyStructuredTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPresentation, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPresentation - file : DeidentifyStructuredTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyStructuredTextRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyPresentation client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1333,26 +1443,26 @@ async def deidentify_structured_text( async def main() -> None: - await client.files.deidentify_structured_text( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyStructuredTextRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="json", + await client.files.deidentify_presentation( + file=FileDataDeidentifyPresentation( + base_64="base64", + data_format="ppt", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_structured_text( - vault_id=vault_id, + _response = await self._raw_client.deidentify_presentation( file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data @@ -1360,14 +1470,16 @@ async def main() -> None: async def deidentify_spreadsheet( self, *, - vault_id: VaultId, - file: DeidentifySpreadsheetRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifySpreadsheet, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ @@ -1375,37 +1487,40 @@ async def deidentify_spreadsheet( Parameters ---------- - vault_id : VaultId - - file : DeidentifySpreadsheetRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifySpreadsheet - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifySpreadsheetRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifySpreadsheet client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1414,78 +1529,83 @@ async def deidentify_spreadsheet( async def main() -> None: await client.files.deidentify_spreadsheet( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifySpreadsheetRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", + file=FileDataDeidentifySpreadsheet( + base_64="base64", data_format="csv", ), + vault_id="vault_id", ) asyncio.run(main()) """ _response = await self._raw_client.deidentify_spreadsheet( - vault_id=vault_id, file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_presentation( + async def deidentify_structured_text( self, *, - vault_id: VaultId, - file: DeidentifyPresentationRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyStructuredText, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyStructuredText - file : DeidentifyPresentationRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyPresentationRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyStructuredText client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1493,103 +1613,82 @@ async def deidentify_presentation( async def main() -> None: - await client.files.deidentify_presentation( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyPresentationRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="pptx", + await client.files.deidentify_structured_text( + file=FileDataDeidentifyStructuredText( + base_64="base64", + data_format="json", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_presentation( - vault_id=vault_id, + _response = await self._raw_client.deidentify_structured_text( file=file, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def deidentify_audio( + async def deidentify_text( self, *, - vault_id: VaultId, - file: DeidentifyAudioRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_audio: typing.Optional[bool] = OMIT, - output_transcription: typing.Optional[DeidentifyAudioRequestOutputTranscription] = OMIT, - bleep_gain: typing.Optional[float] = OMIT, - bleep_frequency: typing.Optional[float] = OMIT, - bleep_start_padding: typing.Optional[float] = OMIT, - bleep_stop_padding: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyText, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyFileResponse: """ - De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyText - file : DeidentifyAudioRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] + Entities to detect and de-identify. - output_processed_audio : typing.Optional[bool] - If `true`, includes processed audio file in the response. + token_type : typing.Optional[TokenTypeMapping] - output_transcription : typing.Optional[DeidentifyAudioRequestOutputTranscription] - Type of transcription to output. + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - bleep_gain : typing.Optional[float] - Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - - bleep_frequency : typing.Optional[float] - The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - - bleep_start_padding : typing.Optional[float] - Padding added to the beginning of a bleep, in seconds. - - bleep_stop_padding : typing.Optional[float] - Padding added to the end of a bleep, in seconds. - - entity_types : typing.Optional[EntityTypes] - - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyFileResponse - A successful response. + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow - from skyflow.files import DeidentifyAudioRequestFile + from skyflow import AsyncSkyflow, FileDataDeidentifyText client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1597,63 +1696,62 @@ async def deidentify_audio( async def main() -> None: - await client.files.deidentify_audio( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=DeidentifyAudioRequestFile( - base_64="SGkgaSBhbSBEZXZhbnNodSwgbGl2...aW5nIGluIGNhbGlmb3JuaWEuIA==", - data_format="mp3", + await client.files.deidentify_text( + file=FileDataDeidentifyText( + base_64="base64", ), + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.deidentify_audio( - vault_id=vault_id, + _response = await self._raw_client.deidentify_text( file=file, - configuration_id=configuration_id, - output_processed_audio=output_processed_audio, - output_transcription=output_transcription, - bleep_gain=bleep_gain, - bleep_frequency=bleep_frequency, - bleep_start_padding=bleep_start_padding, - bleep_stop_padding=bleep_stop_padding, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data - async def get_run( - self, run_id: Uuid, *, vault_id: ResourceId, request_options: typing.Optional[RequestOptions] = None - ) -> DeidentifyStatusResponse: + async def reidentify_file( + self, + *, + file: FileDataReidentifyFile, + vault_id: str, + format: typing.Optional[Format] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> ReidentifyFileResponse: """ - Returns the status of the detect run. + Re-identifies tokens in a file. Parameters ---------- - run_id : Uuid - ID of the detect run. + file : FileDataReidentifyFile - vault_id : ResourceId - ID of the vault. + vault_id : str + ID of the vault where the entities are stored. + + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - DeidentifyStatusResponse - A successful response. + ReidentifyFileResponse + OK Examples -------- import asyncio - from skyflow import AsyncSkyflow + from skyflow import AsyncSkyflow, FileDataReidentifyFile client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1661,52 +1759,51 @@ async def get_run( async def main() -> None: - await client.files.get_run( - run_id="run_id", + await client.files.reidentify_file( + file=FileDataReidentifyFile( + base_64="base64", + data_format="txt", + ), vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.get_run(run_id, vault_id=vault_id, request_options=request_options) + _response = await self._raw_client.reidentify_file( + file=file, vault_id=vault_id, format=format, request_options=request_options + ) return _response.data - async def reidentify_file( + async def get_run( self, + run_id: str, *, - vault_id: VaultId, - file: ReidentifyFileRequestFile, - format: typing.Optional[ReidentifyFileRequestFormat] = OMIT, + vault_id: typing.Optional[str] = None, request_options: typing.Optional[RequestOptions] = None, - ) -> ReidentifyFileResponse: + ) -> DetectRunsResponse: """ - Re-identifies tokens in a file. + Returns the status of a detect run. Parameters ---------- - vault_id : VaultId + run_id : str - file : ReidentifyFileRequestFile - File to re-identify. Files are specified as Base64-encoded data or an EFS path. - - format : typing.Optional[ReidentifyFileRequestFormat] - Mapping of preferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + vault_id : typing.Optional[str] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - ReidentifyFileResponse - A successful response. + DetectRunsResponse + OK Examples -------- import asyncio from skyflow import AsyncSkyflow - from skyflow.files import ReidentifyFileRequestFile client = AsyncSkyflow( token="YOUR_TOKEN", @@ -1714,18 +1811,13 @@ async def reidentify_file( async def main() -> None: - await client.files.reidentify_file( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - file=ReidentifyFileRequestFile( - base_64="Zm9vYmFy", - data_format="txt", - ), + await client.files.get_run( + run_id="run_id", + vault_id="vault_id", ) asyncio.run(main()) """ - _response = await self._raw_client.reidentify_file( - vault_id=vault_id, file=file, format=format, request_options=request_options - ) + _response = await self._raw_client.get_run(run_id, vault_id=vault_id, request_options=request_options) return _response.data diff --git a/skyflow/generated/rest/files/raw_client.py b/skyflow/generated/rest/files/raw_client.py index c0e535ea..863ae5e3 100644 --- a/skyflow/generated/rest/files/raw_client.py +++ b/skyflow/generated/rest/files/raw_client.py @@ -12,34 +12,55 @@ from ..core.serialization import convert_and_respect_annotation_metadata from ..errors.bad_request_error import BadRequestError from ..errors.internal_server_error import InternalServerError -from ..errors.not_found_error import NotFoundError from ..errors.unauthorized_error import UnauthorizedError -from ..types.allow_regex import AllowRegex -from ..types.configuration_id import ConfigurationId from ..types.deidentify_file_response import DeidentifyFileResponse -from ..types.deidentify_status_response import DeidentifyStatusResponse -from ..types.entity_types import EntityTypes +from ..types.detect_runs_response import DetectRunsResponse from ..types.error_response import ErrorResponse +from ..types.file_data import FileData +from ..types.file_data_deidentify_audio import FileDataDeidentifyAudio +from ..types.file_data_deidentify_document import FileDataDeidentifyDocument +from ..types.file_data_deidentify_image import FileDataDeidentifyImage +from ..types.file_data_deidentify_pdf import FileDataDeidentifyPdf +from ..types.file_data_deidentify_presentation import FileDataDeidentifyPresentation +from ..types.file_data_deidentify_spreadsheet import FileDataDeidentifySpreadsheet +from ..types.file_data_deidentify_structured_text import FileDataDeidentifyStructuredText +from ..types.file_data_deidentify_text import FileDataDeidentifyText +from ..types.file_data_reidentify_file import FileDataReidentifyFile +from ..types.format import Format from ..types.reidentify_file_response import ReidentifyFileResponse -from ..types.resource_id import ResourceId -from ..types.restrict_regex import RestrictRegex -from ..types.token_type_without_vault import TokenTypeWithoutVault +from ..types.token_type_mapping import TokenTypeMapping from ..types.transformations import Transformations -from ..types.uuid_ import Uuid -from ..types.vault_id import VaultId -from .types.deidentify_audio_request_file import DeidentifyAudioRequestFile -from .types.deidentify_audio_request_output_transcription import DeidentifyAudioRequestOutputTranscription -from .types.deidentify_document_request_file import DeidentifyDocumentRequestFile -from .types.deidentify_file_request_file import DeidentifyFileRequestFile -from .types.deidentify_image_request_file import DeidentifyImageRequestFile -from .types.deidentify_image_request_masking_method import DeidentifyImageRequestMaskingMethod -from .types.deidentify_pdf_request_file import DeidentifyPdfRequestFile -from .types.deidentify_presentation_request_file import DeidentifyPresentationRequestFile -from .types.deidentify_spreadsheet_request_file import DeidentifySpreadsheetRequestFile -from .types.deidentify_structured_text_request_file import DeidentifyStructuredTextRequestFile -from .types.deidentify_text_request_file import DeidentifyTextRequestFile -from .types.reidentify_file_request_file import ReidentifyFileRequestFile -from .types.reidentify_file_request_format import ReidentifyFileRequestFormat +from .types.deidentify_file_audio_request_deidentify_audio_entity_types_item import ( + DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem, +) +from .types.deidentify_file_audio_request_deidentify_audio_output_transcription import ( + DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription, +) +from .types.deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item import ( + DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem, +) +from .types.deidentify_file_image_request_deidentify_image_entity_types_item import ( + DeidentifyFileImageRequestDeidentifyImageEntityTypesItem, +) +from .types.deidentify_file_image_request_deidentify_image_masking_method import ( + DeidentifyFileImageRequestDeidentifyImageMaskingMethod, +) +from .types.deidentify_file_request_deidentify_document_entity_types_item import ( + DeidentifyFileRequestDeidentifyDocumentEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_presentation_entity_types_item import ( + DeidentifyFileRequestDeidentifyPresentationEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_spreadsheet_entity_types_item import ( + DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_structured_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem, +) +from .types.deidentify_file_request_deidentify_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyTextEntityTypesItem, +) +from .types.deidentify_file_request_entity_types_item import DeidentifyFileRequestEntityTypesItem # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -52,14 +73,14 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): def deidentify_file( self, *, - vault_id: VaultId, - file: DeidentifyFileRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileData, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ @@ -67,49 +88,51 @@ def deidentify_file( Parameters ---------- - vault_id : VaultId + file : FileData - file : DeidentifyFileRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( "v1/detect/deidentify/file", method="POST", json={ + "file": convert_and_respect_annotation_metadata(object_=file, annotation=FileData, direction="write"), "vault_id": vault_id, - "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyFileRequestFile, direction="write" - ), - "configuration_id": configuration_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -165,67 +188,101 @@ def deidentify_file( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_document( + def deidentify_audio( self, *, - vault_id: VaultId, - file: DeidentifyDocumentRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyAudio, + vault_id: str, + output_transcription: typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] = OMIT, + output_processed_audio: typing.Optional[bool] = OMIT, + bleep_start_padding: typing.Optional[float] = OMIT, + bleep_stop_padding: typing.Optional[float] = OMIT, + bleep_frequency: typing.Optional[int] = OMIT, + bleep_gain: typing.Optional[int] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. + De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyAudio + + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - file : DeidentifyDocumentRequestFile - File to de-identify. Files are specified as Base64-encoded data. + output_transcription : typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] + Type of transcription to output. + + output_processed_audio : typing.Optional[bool] + Whether to include the processed audio file in the response. + + bleep_start_padding : typing.Optional[float] + Padding added to the beginning of a bleep, in seconds. + + bleep_stop_padding : typing.Optional[float] + Padding added to the end of a bleep, in seconds. - configuration_id : typing.Optional[ConfigurationId] + bleep_frequency : typing.Optional[int] + The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - entity_types : typing.Optional[EntityTypes] + bleep_gain : typing.Optional[int] + Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - token_type : typing.Optional[TokenTypeWithoutVault] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] + Entities to detect and de-identify. - allow_regex : typing.Optional[AllowRegex] + token_type : typing.Optional[TokenTypeMapping] - restrict_regex : typing.Optional[RestrictRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. + + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/document", + "v1/detect/deidentify/file/audio", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyDocumentRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyAudio, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, + "output_transcription": output_transcription, + "output_processed_audio": output_processed_audio, + "bleep_start_padding": bleep_start_padding, + "bleep_stop_padding": bleep_stop_padding, + "bleep_frequency": bleep_frequency, + "bleep_gain": bleep_gain, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -281,77 +338,71 @@ def deidentify_document( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_pdf( + def deidentify_document( self, *, - vault_id: VaultId, - file: DeidentifyPdfRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[float] = OMIT, - max_resolution: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyDocument, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. + De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyPdfRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyDocument - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - density : typing.Optional[float] - Pixel density at which to process the PDF file. + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] + Entities to detect and de-identify. - max_resolution : typing.Optional[float] - Max resolution at which to process the PDF file. + token_type : typing.Optional[TokenTypeMapping] - entity_types : typing.Optional[EntityTypes] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/document/pdf", + "v1/detect/deidentify/file/document", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyPdfRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyDocument, direction="write" ), - "configuration_id": configuration_id, - "density": density, - "max_resolution": max_resolution, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -407,82 +458,83 @@ def deidentify_pdf( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_image( + def deidentify_pdf( self, *, - vault_id: VaultId, - file: DeidentifyImageRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_image: typing.Optional[bool] = OMIT, - output_ocr_text: typing.Optional[bool] = OMIT, - masking_method: typing.Optional[DeidentifyImageRequestMaskingMethod] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPdf, + vault_id: str, + density: typing.Optional[int] = OMIT, + max_resolution: typing.Optional[int] = OMIT, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPdf - file : DeidentifyImageRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] - - output_processed_image : typing.Optional[bool] - If `true`, includes processed image in the output. - - output_ocr_text : typing.Optional[bool] - If `true`, includes OCR text output in the response. + density : typing.Optional[int] + Pixel density at which to process the PDF file. - masking_method : typing.Optional[DeidentifyImageRequestMaskingMethod] - Method to mask the entities in the image. + max_resolution : typing.Optional[int] + Max resolution at which to process the PDF file. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/image", + "v1/detect/deidentify/file/document/pdf", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyImageRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyPdf, direction="write" ), - "configuration_id": configuration_id, - "output_processed_image": output_processed_image, - "output_ocr_text": output_ocr_text, - "masking_method": masking_method, + "vault_id": vault_id, + "density": density, + "max_resolution": max_resolution, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -538,67 +590,86 @@ def deidentify_image( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_text( + def deidentify_image( self, *, - vault_id: VaultId, - file: DeidentifyTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyImage, + vault_id: str, + output_processed_image: typing.Optional[bool] = OMIT, + output_ocr_text: typing.Optional[bool] = OMIT, + masking_method: typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. + De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyImage + + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. + + output_processed_image : typing.Optional[bool] + If `true`, includes processed image in the output. - file : DeidentifyTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + output_ocr_text : typing.Optional[bool] + If `true`, includes text detected by OCR in the response. - configuration_id : typing.Optional[ConfigurationId] + masking_method : typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] + Method to mask the entities in the image. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/text", + "v1/detect/deidentify/file/image", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyTextRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyImage, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, + "output_processed_image": output_processed_image, + "output_ocr_text": output_ocr_text, + "masking_method": masking_method, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -654,67 +725,73 @@ def deidentify_text( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_structured_text( + def deidentify_presentation( self, *, - vault_id: VaultId, - file: DeidentifyStructuredTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPresentation, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPresentation - file : DeidentifyStructuredTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/structured_text", + "v1/detect/deidentify/file/presentation", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyStructuredTextRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyPresentation, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -773,14 +850,16 @@ def deidentify_structured_text( def deidentify_spreadsheet( self, *, - vault_id: VaultId, - file: DeidentifySpreadsheetRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifySpreadsheet, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ @@ -788,49 +867,53 @@ def deidentify_spreadsheet( Parameters ---------- - vault_id : VaultId - - file : DeidentifySpreadsheetRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifySpreadsheet - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( "v1/detect/deidentify/file/spreadsheet", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifySpreadsheetRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifySpreadsheet, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -886,67 +969,73 @@ def deidentify_spreadsheet( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_presentation( + def deidentify_structured_text( self, *, - vault_id: VaultId, - file: DeidentifyPresentationRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyStructuredText, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyPresentationRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyStructuredText - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/presentation", + "v1/detect/deidentify/file/structured_text", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyPresentationRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyStructuredText, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1002,97 +1091,71 @@ def deidentify_presentation( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def deidentify_audio( + def deidentify_text( self, *, - vault_id: VaultId, - file: DeidentifyAudioRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_audio: typing.Optional[bool] = OMIT, - output_transcription: typing.Optional[DeidentifyAudioRequestOutputTranscription] = OMIT, - bleep_gain: typing.Optional[float] = OMIT, - bleep_frequency: typing.Optional[float] = OMIT, - bleep_start_padding: typing.Optional[float] = OMIT, - bleep_stop_padding: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyText, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyAudioRequestFile - File to de-identify. Files are specified as Base64-encoded data. - - configuration_id : typing.Optional[ConfigurationId] - - output_processed_audio : typing.Optional[bool] - If `true`, includes processed audio file in the response. - - output_transcription : typing.Optional[DeidentifyAudioRequestOutputTranscription] - Type of transcription to output. - - bleep_gain : typing.Optional[float] - Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - - bleep_frequency : typing.Optional[float] - The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - - bleep_start_padding : typing.Optional[float] - Padding added to the beginning of a bleep, in seconds. + file : FileDataDeidentifyText - bleep_stop_padding : typing.Optional[float] - Padding added to the end of a bleep, in seconds. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/audio", + "v1/detect/deidentify/file/text", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyAudioRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyText, direction="write" ), - "configuration_id": configuration_id, - "output_processed_audio": output_processed_audio, - "output_transcription": output_transcription, - "bleep_gain": bleep_gain, - "bleep_frequency": bleep_frequency, - "bleep_start_padding": bleep_start_padding, - "bleep_stop_padding": bleep_stop_padding, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1148,42 +1211,56 @@ def deidentify_audio( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def get_run( - self, run_id: Uuid, *, vault_id: ResourceId, request_options: typing.Optional[RequestOptions] = None - ) -> HttpResponse[DeidentifyStatusResponse]: + def reidentify_file( + self, + *, + file: FileDataReidentifyFile, + vault_id: str, + format: typing.Optional[Format] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> HttpResponse[ReidentifyFileResponse]: """ - Returns the status of the detect run. + Re-identifies tokens in a file. Parameters ---------- - run_id : Uuid - ID of the detect run. + file : FileDataReidentifyFile - vault_id : ResourceId - ID of the vault. + vault_id : str + ID of the vault where the entities are stored. + + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - HttpResponse[DeidentifyStatusResponse] - A successful response. + HttpResponse[ReidentifyFileResponse] + OK """ _response = self._client_wrapper.httpx_client.request( - f"v1/detect/runs/{jsonable_encoder(run_id)}", - method="GET", - params={ + "v1/detect/reidentify/file", + method="POST", + json={ + "file": convert_and_respect_annotation_metadata( + object_=file, annotation=FileDataReidentifyFile, direction="write" + ), "vault_id": vault_id, + "format": convert_and_respect_annotation_metadata(object_=format, annotation=Format, direction="write"), + }, + headers={ + "content-type": "application/json", }, request_options=request_options, + omit=OMIT, ) try: if 200 <= _response.status_code < 300: _data = typing.cast( - DeidentifyStatusResponse, + ReidentifyFileResponse, parse_obj_as( - type_=DeidentifyStatusResponse, # type: ignore + type_=ReidentifyFileResponse, # type: ignore object_=_response.json(), ), ) @@ -1210,17 +1287,6 @@ def get_run( ), ), ) - if _response.status_code == 404: - raise NotFoundError( - headers=dict(_response.headers), - body=typing.cast( - typing.Optional[typing.Any], - parse_obj_as( - type_=typing.Optional[typing.Any], # type: ignore - object_=_response.json(), - ), - ), - ) if _response.status_code == 500: raise InternalServerError( headers=dict(_response.headers), @@ -1237,59 +1303,44 @@ def get_run( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - def reidentify_file( + def get_run( self, + run_id: str, *, - vault_id: VaultId, - file: ReidentifyFileRequestFile, - format: typing.Optional[ReidentifyFileRequestFormat] = OMIT, + vault_id: typing.Optional[str] = None, request_options: typing.Optional[RequestOptions] = None, - ) -> HttpResponse[ReidentifyFileResponse]: + ) -> HttpResponse[DetectRunsResponse]: """ - Re-identifies tokens in a file. + Returns the status of a detect run. Parameters ---------- - vault_id : VaultId - - file : ReidentifyFileRequestFile - File to re-identify. Files are specified as Base64-encoded data or an EFS path. + run_id : str - format : typing.Optional[ReidentifyFileRequestFormat] - Mapping of preferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + vault_id : typing.Optional[str] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - HttpResponse[ReidentifyFileResponse] - A successful response. + HttpResponse[DetectRunsResponse] + OK """ _response = self._client_wrapper.httpx_client.request( - "v1/detect/reidentify/file", - method="POST", - json={ + f"v1/detect/runs/{jsonable_encoder(run_id)}", + method="GET", + params={ "vault_id": vault_id, - "file": convert_and_respect_annotation_metadata( - object_=file, annotation=ReidentifyFileRequestFile, direction="write" - ), - "format": convert_and_respect_annotation_metadata( - object_=format, annotation=ReidentifyFileRequestFormat, direction="write" - ), - }, - headers={ - "content-type": "application/json", }, request_options=request_options, - omit=OMIT, ) try: if 200 <= _response.status_code < 300: _data = typing.cast( - ReidentifyFileResponse, + DetectRunsResponse, parse_obj_as( - type_=ReidentifyFileResponse, # type: ignore + type_=DetectRunsResponse, # type: ignore object_=_response.json(), ), ) @@ -1340,14 +1391,14 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): async def deidentify_file( self, *, - vault_id: VaultId, - file: DeidentifyFileRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileData, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ @@ -1355,49 +1406,51 @@ async def deidentify_file( Parameters ---------- - vault_id : VaultId + file : FileData - file : DeidentifyFileRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( "v1/detect/deidentify/file", method="POST", json={ + "file": convert_and_respect_annotation_metadata(object_=file, annotation=FileData, direction="write"), "vault_id": vault_id, - "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyFileRequestFile, direction="write" - ), - "configuration_id": configuration_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1453,67 +1506,101 @@ async def deidentify_file( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_document( + async def deidentify_audio( self, *, - vault_id: VaultId, - file: DeidentifyDocumentRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyAudio, + vault_id: str, + output_transcription: typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] = OMIT, + output_processed_audio: typing.Optional[bool] = OMIT, + bleep_start_padding: typing.Optional[float] = OMIT, + bleep_stop_padding: typing.Optional[float] = OMIT, + bleep_frequency: typing.Optional[int] = OMIT, + bleep_gain: typing.Optional[int] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. + De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyAudio + + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - file : DeidentifyDocumentRequestFile - File to de-identify. Files are specified as Base64-encoded data. + output_transcription : typing.Optional[DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription] + Type of transcription to output. + + output_processed_audio : typing.Optional[bool] + Whether to include the processed audio file in the response. + + bleep_start_padding : typing.Optional[float] + Padding added to the beginning of a bleep, in seconds. + + bleep_stop_padding : typing.Optional[float] + Padding added to the end of a bleep, in seconds. - configuration_id : typing.Optional[ConfigurationId] + bleep_frequency : typing.Optional[int] + The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - entity_types : typing.Optional[EntityTypes] + bleep_gain : typing.Optional[int] + Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - token_type : typing.Optional[TokenTypeWithoutVault] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem]] + Entities to detect and de-identify. - allow_regex : typing.Optional[AllowRegex] + token_type : typing.Optional[TokenTypeMapping] - restrict_regex : typing.Optional[RestrictRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. + + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/document", + "v1/detect/deidentify/file/audio", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyDocumentRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyAudio, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, + "output_transcription": output_transcription, + "output_processed_audio": output_processed_audio, + "bleep_start_padding": bleep_start_padding, + "bleep_stop_padding": bleep_stop_padding, + "bleep_frequency": bleep_frequency, + "bleep_gain": bleep_gain, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1569,77 +1656,71 @@ async def deidentify_document( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_pdf( + async def deidentify_document( self, *, - vault_id: VaultId, - file: DeidentifyPdfRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - density: typing.Optional[float] = OMIT, - max_resolution: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyDocument, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. + De-identifies sensitive data from a document file. This operation includes options applicable to all supported document file types.

For more specific options, see the file type-specific opertions (like De-identify PDF) where they're available. For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyPdfRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyDocument - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - density : typing.Optional[float] - Pixel density at which to process the PDF file. + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyDocumentEntityTypesItem]] + Entities to detect and de-identify. - max_resolution : typing.Optional[float] - Max resolution at which to process the PDF file. + token_type : typing.Optional[TokenTypeMapping] - entity_types : typing.Optional[EntityTypes] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - token_type : typing.Optional[TokenTypeWithoutVault] - - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/document/pdf", + "v1/detect/deidentify/file/document", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyPdfRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyDocument, direction="write" ), - "configuration_id": configuration_id, - "density": density, - "max_resolution": max_resolution, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1695,82 +1776,83 @@ async def deidentify_pdf( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_image( + async def deidentify_pdf( self, *, - vault_id: VaultId, - file: DeidentifyImageRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_image: typing.Optional[bool] = OMIT, - output_ocr_text: typing.Optional[bool] = OMIT, - masking_method: typing.Optional[DeidentifyImageRequestMaskingMethod] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPdf, + vault_id: str, + density: typing.Optional[int] = OMIT, + max_resolution: typing.Optional[int] = OMIT, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a PDF file. This operation includes options specific to PDF files.

For broader file type support, see De-identify Document and De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPdf - file : DeidentifyImageRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] - - output_processed_image : typing.Optional[bool] - If `true`, includes processed image in the output. - - output_ocr_text : typing.Optional[bool] - If `true`, includes OCR text output in the response. + density : typing.Optional[int] + Pixel density at which to process the PDF file. - masking_method : typing.Optional[DeidentifyImageRequestMaskingMethod] - Method to mask the entities in the image. + max_resolution : typing.Optional[int] + Max resolution at which to process the PDF file. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/image", + "v1/detect/deidentify/file/document/pdf", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyImageRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyPdf, direction="write" ), - "configuration_id": configuration_id, - "output_processed_image": output_processed_image, - "output_ocr_text": output_ocr_text, - "masking_method": masking_method, + "vault_id": vault_id, + "density": density, + "max_resolution": max_resolution, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1826,67 +1908,86 @@ async def deidentify_image( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_text( + async def deidentify_image( self, *, - vault_id: VaultId, - file: DeidentifyTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyImage, + vault_id: str, + output_processed_image: typing.Optional[bool] = OMIT, + output_ocr_text: typing.Optional[bool] = OMIT, + masking_method: typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] = OMIT, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. + De-identifies sensitive data from an image file. This operation includes options applicable to all supported image file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyImage + + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. + + output_processed_image : typing.Optional[bool] + If `true`, includes processed image in the output. - file : DeidentifyTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + output_ocr_text : typing.Optional[bool] + If `true`, includes text detected by OCR in the response. - configuration_id : typing.Optional[ConfigurationId] + masking_method : typing.Optional[DeidentifyFileImageRequestDeidentifyImageMaskingMethod] + Method to mask the entities in the image. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileImageRequestDeidentifyImageEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/text", + "v1/detect/deidentify/file/image", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyTextRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyImage, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, + "output_processed_image": output_processed_image, + "output_ocr_text": output_ocr_text, + "masking_method": masking_method, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -1942,67 +2043,73 @@ async def deidentify_text( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_structured_text( + async def deidentify_presentation( self, *, - vault_id: VaultId, - file: DeidentifyStructuredTextRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyPresentation, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId + file : FileDataDeidentifyPresentation - file : DeidentifyStructuredTextRequestFile - File to de-identify. Files are specified as Base64-encoded data. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - configuration_id : typing.Optional[ConfigurationId] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyPresentationEntityTypesItem]] + Entities to detect and de-identify. - entity_types : typing.Optional[EntityTypes] + token_type : typing.Optional[TokenTypeMapping] - token_type : typing.Optional[TokenTypeWithoutVault] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - allow_regex : typing.Optional[AllowRegex] - - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/structured_text", + "v1/detect/deidentify/file/presentation", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyStructuredTextRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyPresentation, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -2061,14 +2168,16 @@ async def deidentify_structured_text( async def deidentify_spreadsheet( self, *, - vault_id: VaultId, - file: DeidentifySpreadsheetRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifySpreadsheet, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ @@ -2076,49 +2185,53 @@ async def deidentify_spreadsheet( Parameters ---------- - vault_id : VaultId - - file : DeidentifySpreadsheetRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifySpreadsheet - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( "v1/detect/deidentify/file/spreadsheet", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifySpreadsheetRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifySpreadsheet, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -2174,67 +2287,73 @@ async def deidentify_spreadsheet( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_presentation( + async def deidentify_structured_text( self, *, - vault_id: VaultId, - file: DeidentifyPresentationRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyStructuredText, + vault_id: str, + entity_types: typing.Optional[ + typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem] + ] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from a presentation file. This operation includes options applicable to all supported presentation file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a structured text file. This operation includes options applicable to all supported structured text file types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyPresentationRequestFile - File to de-identify. Files are specified as Base64-encoded data. + file : FileDataDeidentifyStructuredText - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/presentation", + "v1/detect/deidentify/file/structured_text", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyPresentationRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyStructuredText, direction="write" ), - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -2290,97 +2409,71 @@ async def deidentify_presentation( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def deidentify_audio( + async def deidentify_text( self, *, - vault_id: VaultId, - file: DeidentifyAudioRequestFile, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - output_processed_audio: typing.Optional[bool] = OMIT, - output_transcription: typing.Optional[DeidentifyAudioRequestOutputTranscription] = OMIT, - bleep_gain: typing.Optional[float] = OMIT, - bleep_frequency: typing.Optional[float] = OMIT, - bleep_start_padding: typing.Optional[float] = OMIT, - bleep_stop_padding: typing.Optional[float] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenTypeWithoutVault] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + file: FileDataDeidentifyText, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyFileResponse]: """ - De-identifies sensitive data from an audio file. This operation includes options applicable to all supported audio file types.

For broader file type support, see De-identify File. + De-identifies sensitive data from a text file. This operation includes options applicable to all supported image text types.

For broader file type support, see De-identify File. Parameters ---------- - vault_id : VaultId - - file : DeidentifyAudioRequestFile - File to de-identify. Files are specified as Base64-encoded data. - - configuration_id : typing.Optional[ConfigurationId] - - output_processed_audio : typing.Optional[bool] - If `true`, includes processed audio file in the response. - - output_transcription : typing.Optional[DeidentifyAudioRequestOutputTranscription] - Type of transcription to output. - - bleep_gain : typing.Optional[float] - Relative loudness of the bleep in dB. Positive values increase its loudness, and negative values decrease it. - - bleep_frequency : typing.Optional[float] - The pitch of the bleep sound, in Hz. The higher the number, the higher the pitch. - - bleep_start_padding : typing.Optional[float] - Padding added to the beginning of a bleep, in seconds. + file : FileDataDeidentifyText - bleep_stop_padding : typing.Optional[float] - Padding added to the end of a bleep, in seconds. + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyFileRequestDeidentifyTextEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenTypeWithoutVault] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyFileResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/deidentify/file/audio", + "v1/detect/deidentify/file/text", method="POST", json={ - "vault_id": vault_id, "file": convert_and_respect_annotation_metadata( - object_=file, annotation=DeidentifyAudioRequestFile, direction="write" + object_=file, annotation=FileDataDeidentifyText, direction="write" ), - "configuration_id": configuration_id, - "output_processed_audio": output_processed_audio, - "output_transcription": output_transcription, - "bleep_gain": bleep_gain, - "bleep_frequency": bleep_frequency, - "bleep_start_padding": bleep_start_padding, - "bleep_stop_padding": bleep_stop_padding, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenTypeWithoutVault, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -2436,42 +2529,56 @@ async def deidentify_audio( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def get_run( - self, run_id: Uuid, *, vault_id: ResourceId, request_options: typing.Optional[RequestOptions] = None - ) -> AsyncHttpResponse[DeidentifyStatusResponse]: + async def reidentify_file( + self, + *, + file: FileDataReidentifyFile, + vault_id: str, + format: typing.Optional[Format] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> AsyncHttpResponse[ReidentifyFileResponse]: """ - Returns the status of the detect run. + Re-identifies tokens in a file. Parameters ---------- - run_id : Uuid - ID of the detect run. + file : FileDataReidentifyFile - vault_id : ResourceId - ID of the vault. + vault_id : str + ID of the vault where the entities are stored. + + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - AsyncHttpResponse[DeidentifyStatusResponse] - A successful response. + AsyncHttpResponse[ReidentifyFileResponse] + OK """ _response = await self._client_wrapper.httpx_client.request( - f"v1/detect/runs/{jsonable_encoder(run_id)}", - method="GET", - params={ + "v1/detect/reidentify/file", + method="POST", + json={ + "file": convert_and_respect_annotation_metadata( + object_=file, annotation=FileDataReidentifyFile, direction="write" + ), "vault_id": vault_id, + "format": convert_and_respect_annotation_metadata(object_=format, annotation=Format, direction="write"), + }, + headers={ + "content-type": "application/json", }, request_options=request_options, + omit=OMIT, ) try: if 200 <= _response.status_code < 300: _data = typing.cast( - DeidentifyStatusResponse, + ReidentifyFileResponse, parse_obj_as( - type_=DeidentifyStatusResponse, # type: ignore + type_=ReidentifyFileResponse, # type: ignore object_=_response.json(), ), ) @@ -2498,17 +2605,6 @@ async def get_run( ), ), ) - if _response.status_code == 404: - raise NotFoundError( - headers=dict(_response.headers), - body=typing.cast( - typing.Optional[typing.Any], - parse_obj_as( - type_=typing.Optional[typing.Any], # type: ignore - object_=_response.json(), - ), - ), - ) if _response.status_code == 500: raise InternalServerError( headers=dict(_response.headers), @@ -2525,59 +2621,44 @@ async def get_run( raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text) raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json) - async def reidentify_file( + async def get_run( self, + run_id: str, *, - vault_id: VaultId, - file: ReidentifyFileRequestFile, - format: typing.Optional[ReidentifyFileRequestFormat] = OMIT, + vault_id: typing.Optional[str] = None, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[ReidentifyFileResponse]: + ) -> AsyncHttpResponse[DetectRunsResponse]: """ - Re-identifies tokens in a file. + Returns the status of a detect run. Parameters ---------- - vault_id : VaultId - - file : ReidentifyFileRequestFile - File to re-identify. Files are specified as Base64-encoded data or an EFS path. + run_id : str - format : typing.Optional[ReidentifyFileRequestFormat] - Mapping of preferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + vault_id : typing.Optional[str] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - AsyncHttpResponse[ReidentifyFileResponse] - A successful response. + AsyncHttpResponse[DetectRunsResponse] + OK """ _response = await self._client_wrapper.httpx_client.request( - "v1/detect/reidentify/file", - method="POST", - json={ + f"v1/detect/runs/{jsonable_encoder(run_id)}", + method="GET", + params={ "vault_id": vault_id, - "file": convert_and_respect_annotation_metadata( - object_=file, annotation=ReidentifyFileRequestFile, direction="write" - ), - "format": convert_and_respect_annotation_metadata( - object_=format, annotation=ReidentifyFileRequestFormat, direction="write" - ), - }, - headers={ - "content-type": "application/json", }, request_options=request_options, - omit=OMIT, ) try: if 200 <= _response.status_code < 300: _data = typing.cast( - ReidentifyFileResponse, + DetectRunsResponse, parse_obj_as( - type_=ReidentifyFileResponse, # type: ignore + type_=DetectRunsResponse, # type: ignore object_=_response.json(), ), ) diff --git a/skyflow/generated/rest/files/types/__init__.py b/skyflow/generated/rest/files/types/__init__.py index 78943a33..5ff54d96 100644 --- a/skyflow/generated/rest/files/types/__init__.py +++ b/skyflow/generated/rest/files/types/__init__.py @@ -2,48 +2,48 @@ # isort: skip_file -from .deidentify_audio_request_file import DeidentifyAudioRequestFile -from .deidentify_audio_request_file_data_format import DeidentifyAudioRequestFileDataFormat -from .deidentify_audio_request_output_transcription import DeidentifyAudioRequestOutputTranscription -from .deidentify_document_request_file import DeidentifyDocumentRequestFile -from .deidentify_document_request_file_data_format import DeidentifyDocumentRequestFileDataFormat -from .deidentify_file_request_file import DeidentifyFileRequestFile -from .deidentify_file_request_file_data_format import DeidentifyFileRequestFileDataFormat -from .deidentify_image_request_file import DeidentifyImageRequestFile -from .deidentify_image_request_file_data_format import DeidentifyImageRequestFileDataFormat -from .deidentify_image_request_masking_method import DeidentifyImageRequestMaskingMethod -from .deidentify_pdf_request_file import DeidentifyPdfRequestFile -from .deidentify_presentation_request_file import DeidentifyPresentationRequestFile -from .deidentify_presentation_request_file_data_format import DeidentifyPresentationRequestFileDataFormat -from .deidentify_spreadsheet_request_file import DeidentifySpreadsheetRequestFile -from .deidentify_spreadsheet_request_file_data_format import DeidentifySpreadsheetRequestFileDataFormat -from .deidentify_structured_text_request_file import DeidentifyStructuredTextRequestFile -from .deidentify_structured_text_request_file_data_format import DeidentifyStructuredTextRequestFileDataFormat -from .deidentify_text_request_file import DeidentifyTextRequestFile -from .reidentify_file_request_file import ReidentifyFileRequestFile -from .reidentify_file_request_file_data_format import ReidentifyFileRequestFileDataFormat -from .reidentify_file_request_format import ReidentifyFileRequestFormat +from .deidentify_file_audio_request_deidentify_audio_entity_types_item import ( + DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem, +) +from .deidentify_file_audio_request_deidentify_audio_output_transcription import ( + DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription, +) +from .deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item import ( + DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem, +) +from .deidentify_file_image_request_deidentify_image_entity_types_item import ( + DeidentifyFileImageRequestDeidentifyImageEntityTypesItem, +) +from .deidentify_file_image_request_deidentify_image_masking_method import ( + DeidentifyFileImageRequestDeidentifyImageMaskingMethod, +) +from .deidentify_file_request_deidentify_document_entity_types_item import ( + DeidentifyFileRequestDeidentifyDocumentEntityTypesItem, +) +from .deidentify_file_request_deidentify_presentation_entity_types_item import ( + DeidentifyFileRequestDeidentifyPresentationEntityTypesItem, +) +from .deidentify_file_request_deidentify_spreadsheet_entity_types_item import ( + DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem, +) +from .deidentify_file_request_deidentify_structured_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem, +) +from .deidentify_file_request_deidentify_text_entity_types_item import ( + DeidentifyFileRequestDeidentifyTextEntityTypesItem, +) +from .deidentify_file_request_entity_types_item import DeidentifyFileRequestEntityTypesItem __all__ = [ - "DeidentifyAudioRequestFile", - "DeidentifyAudioRequestFileDataFormat", - "DeidentifyAudioRequestOutputTranscription", - "DeidentifyDocumentRequestFile", - "DeidentifyDocumentRequestFileDataFormat", - "DeidentifyFileRequestFile", - "DeidentifyFileRequestFileDataFormat", - "DeidentifyImageRequestFile", - "DeidentifyImageRequestFileDataFormat", - "DeidentifyImageRequestMaskingMethod", - "DeidentifyPdfRequestFile", - "DeidentifyPresentationRequestFile", - "DeidentifyPresentationRequestFileDataFormat", - "DeidentifySpreadsheetRequestFile", - "DeidentifySpreadsheetRequestFileDataFormat", - "DeidentifyStructuredTextRequestFile", - "DeidentifyStructuredTextRequestFileDataFormat", - "DeidentifyTextRequestFile", - "ReidentifyFileRequestFile", - "ReidentifyFileRequestFileDataFormat", - "ReidentifyFileRequestFormat", + "DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem", + "DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription", + "DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageEntityTypesItem", + "DeidentifyFileImageRequestDeidentifyImageMaskingMethod", + "DeidentifyFileRequestDeidentifyDocumentEntityTypesItem", + "DeidentifyFileRequestDeidentifyPresentationEntityTypesItem", + "DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem", + "DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem", + "DeidentifyFileRequestDeidentifyTextEntityTypesItem", + "DeidentifyFileRequestEntityTypesItem", ] diff --git a/skyflow/generated/rest/files/types/deidentify_audio_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_audio_request_file_data_format.py deleted file mode 100644 index 85f60bbb..00000000 --- a/skyflow/generated/rest/files/types/deidentify_audio_request_file_data_format.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyAudioRequestFileDataFormat = typing.Union[typing.Literal["mp3", "wav"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_audio_request_output_transcription.py b/skyflow/generated/rest/files/types/deidentify_audio_request_output_transcription.py deleted file mode 100644 index 4588b1d1..00000000 --- a/skyflow/generated/rest/files/types/deidentify_audio_request_output_transcription.py +++ /dev/null @@ -1,14 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyAudioRequestOutputTranscription = typing.Union[ - typing.Literal[ - "diarized_transcription", - "medical_diarized_transcription", - "medical_transcription", - "plaintext_transcription", - "transcription", - ], - typing.Any, -] diff --git a/skyflow/generated/rest/files/types/deidentify_document_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_document_request_file_data_format.py deleted file mode 100644 index a20f4fd8..00000000 --- a/skyflow/generated/rest/files/types/deidentify_document_request_file_data_format.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyDocumentRequestFileDataFormat = typing.Union[typing.Literal["doc", "docx", "pdf"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_entity_types_item.py new file mode 100644 index 00000000..b22a177a --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileAudioRequestDeidentifyAudioEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_output_transcription.py b/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_output_transcription.py new file mode 100644 index 00000000..957cee86 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_audio_request_deidentify_audio_output_transcription.py @@ -0,0 +1,10 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileAudioRequestDeidentifyAudioOutputTranscription = typing.Union[ + typing.Literal[ + "transcription", "medical_transcription", "diarized_transcription", "medical_diarized_transcription" + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item.py new file mode 100644 index 00000000..d8469e74 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_document_pdf_request_deidentify_pdf_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileDocumentPdfRequestDeidentifyPdfEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_entity_types_item.py new file mode 100644 index 00000000..4597dc0f --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileImageRequestDeidentifyImageEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_masking_method.py b/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_masking_method.py new file mode 100644 index 00000000..c357821e --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_image_request_deidentify_image_masking_method.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileImageRequestDeidentifyImageMaskingMethod = typing.Union[typing.Literal["blur", "blackbox"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_document_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_document_entity_types_item.py new file mode 100644 index 00000000..afcbe2ee --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_document_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestDeidentifyDocumentEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_presentation_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_presentation_entity_types_item.py new file mode 100644 index 00000000..69d40627 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_presentation_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestDeidentifyPresentationEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_spreadsheet_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_spreadsheet_entity_types_item.py new file mode 100644 index 00000000..0bb37078 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_spreadsheet_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestDeidentifySpreadsheetEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_structured_text_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_structured_text_entity_types_item.py new file mode 100644 index 00000000..a487d969 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_structured_text_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestDeidentifyStructuredTextEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_text_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_text_entity_types_item.py new file mode 100644 index 00000000..2e6118cd --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_deidentify_text_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestDeidentifyTextEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_entity_types_item.py b/skyflow/generated/rest/files/types/deidentify_file_request_entity_types_item.py new file mode 100644 index 00000000..6d7e8d38 --- /dev/null +++ b/skyflow/generated/rest/files/types/deidentify_file_request_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyFileRequestEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/files/types/deidentify_image_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_image_request_file_data_format.py deleted file mode 100644 index a2ca8f2a..00000000 --- a/skyflow/generated/rest/files/types/deidentify_image_request_file_data_format.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyImageRequestFileDataFormat = typing.Union[ - typing.Literal["bmp", "jpeg", "jpg", "png", "tif", "tiff"], typing.Any -] diff --git a/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py b/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py deleted file mode 100644 index bc0c338c..00000000 --- a/skyflow/generated/rest/files/types/deidentify_image_request_masking_method.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyImageRequestMaskingMethod = typing.Union[typing.Literal["blackbox", "blur"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_presentation_request_file.py b/skyflow/generated/rest/files/types/deidentify_presentation_request_file.py deleted file mode 100644 index c618ccc1..00000000 --- a/skyflow/generated/rest/files/types/deidentify_presentation_request_file.py +++ /dev/null @@ -1,34 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_presentation_request_file_data_format import DeidentifyPresentationRequestFileDataFormat - - -class DeidentifyPresentationRequestFile(UniversalBaseModel): - """ - File to de-identify. Files are specified as Base64-encoded data. - """ - - base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() - """ - Base64-encoded data of the file to de-identify. - """ - - data_format: DeidentifyPresentationRequestFileDataFormat = pydantic.Field() - """ - Data format of the file. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/files/types/deidentify_presentation_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_presentation_request_file_data_format.py deleted file mode 100644 index d09f42f8..00000000 --- a/skyflow/generated/rest/files/types/deidentify_presentation_request_file_data_format.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyPresentationRequestFileDataFormat = typing.Union[typing.Literal["ppt", "pptx"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file_data_format.py deleted file mode 100644 index 20db2856..00000000 --- a/skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file_data_format.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifySpreadsheetRequestFileDataFormat = typing.Union[typing.Literal["csv", "xls", "xlsx"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_structured_text_request_file.py b/skyflow/generated/rest/files/types/deidentify_structured_text_request_file.py deleted file mode 100644 index aa2d0834..00000000 --- a/skyflow/generated/rest/files/types/deidentify_structured_text_request_file.py +++ /dev/null @@ -1,34 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_structured_text_request_file_data_format import DeidentifyStructuredTextRequestFileDataFormat - - -class DeidentifyStructuredTextRequestFile(UniversalBaseModel): - """ - File to de-identify. Files are specified as Base64-encoded data. - """ - - base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() - """ - Base64-encoded data of the file to de-identify. - """ - - data_format: DeidentifyStructuredTextRequestFileDataFormat = pydantic.Field() - """ - Data format of the file. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/files/types/deidentify_structured_text_request_file_data_format.py b/skyflow/generated/rest/files/types/deidentify_structured_text_request_file_data_format.py deleted file mode 100644 index f956fe48..00000000 --- a/skyflow/generated/rest/files/types/deidentify_structured_text_request_file_data_format.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyStructuredTextRequestFileDataFormat = typing.Union[typing.Literal["json", "xml"], typing.Any] diff --git a/skyflow/generated/rest/files/types/reidentify_file_request_file.py b/skyflow/generated/rest/files/types/reidentify_file_request_file.py deleted file mode 100644 index 429f22ee..00000000 --- a/skyflow/generated/rest/files/types/reidentify_file_request_file.py +++ /dev/null @@ -1,34 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .reidentify_file_request_file_data_format import ReidentifyFileRequestFileDataFormat - - -class ReidentifyFileRequestFile(UniversalBaseModel): - """ - File to re-identify. Files are specified as Base64-encoded data or an EFS path. - """ - - base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() - """ - Base64-encoded data of the file to re-identify. - """ - - data_format: ReidentifyFileRequestFileDataFormat = pydantic.Field() - """ - Data format of the file. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/files/types/reidentify_file_request_file_data_format.py b/skyflow/generated/rest/files/types/reidentify_file_request_file_data_format.py deleted file mode 100644 index 5aca9bb6..00000000 --- a/skyflow/generated/rest/files/types/reidentify_file_request_file_data_format.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -ReidentifyFileRequestFileDataFormat = typing.Union[ - typing.Literal["csv", "doc", "docx", "json", "txt", "xls", "xlsx", "xml"], typing.Any -] diff --git a/skyflow/generated/rest/guardrails/client.py b/skyflow/generated/rest/guardrails/client.py index e7fe1e05..88cab59d 100644 --- a/skyflow/generated/rest/guardrails/client.py +++ b/skyflow/generated/rest/guardrails/client.py @@ -4,8 +4,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions -from ..types.check_guardrails_response import CheckGuardrailsResponse -from ..types.vault_id import VaultId +from ..types.detect_guardrails_response import DetectGuardrailsResponse from .raw_client import AsyncRawGuardrailsClient, RawGuardrailsClient # this is used as the default value for optional parameters @@ -30,24 +29,25 @@ def with_raw_response(self) -> RawGuardrailsClient: def check_guardrails( self, *, - vault_id: VaultId, text: str, + vault_id: str, check_toxicity: typing.Optional[bool] = OMIT, deny_topics: typing.Optional[typing.Sequence[str]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> CheckGuardrailsResponse: + ) -> DetectGuardrailsResponse: """ Preserve safety and compliance with usage policies. Parameters ---------- - vault_id : VaultId - text : str Text to check against guardrails. + vault_id : str + ID of the vault. + check_toxicity : typing.Optional[bool] - Check for toxicity in the text. + If `true`, checks for toxicity in the text. deny_topics : typing.Optional[typing.Sequence[str]] List of topics to deny. @@ -57,8 +57,8 @@ def check_guardrails( Returns ------- - CheckGuardrailsResponse - A successful response. + DetectGuardrailsResponse + OK Examples -------- @@ -68,13 +68,15 @@ def check_guardrails( token="YOUR_TOKEN", ) client.guardrails.check_guardrails( - vault_id="vault_id", - text="text", + text="I love to play cricket.", + check_toxicity=True, + deny_topics=["sports"], + vault_id="$VAULT_ID", ) """ _response = self._raw_client.check_guardrails( - vault_id=vault_id, text=text, + vault_id=vault_id, check_toxicity=check_toxicity, deny_topics=deny_topics, request_options=request_options, @@ -100,24 +102,25 @@ def with_raw_response(self) -> AsyncRawGuardrailsClient: async def check_guardrails( self, *, - vault_id: VaultId, text: str, + vault_id: str, check_toxicity: typing.Optional[bool] = OMIT, deny_topics: typing.Optional[typing.Sequence[str]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> CheckGuardrailsResponse: + ) -> DetectGuardrailsResponse: """ Preserve safety and compliance with usage policies. Parameters ---------- - vault_id : VaultId - text : str Text to check against guardrails. + vault_id : str + ID of the vault. + check_toxicity : typing.Optional[bool] - Check for toxicity in the text. + If `true`, checks for toxicity in the text. deny_topics : typing.Optional[typing.Sequence[str]] List of topics to deny. @@ -127,8 +130,8 @@ async def check_guardrails( Returns ------- - CheckGuardrailsResponse - A successful response. + DetectGuardrailsResponse + OK Examples -------- @@ -143,16 +146,18 @@ async def check_guardrails( async def main() -> None: await client.guardrails.check_guardrails( - vault_id="vault_id", - text="text", + text="I love to play cricket.", + check_toxicity=True, + deny_topics=["sports"], + vault_id="$VAULT_ID", ) asyncio.run(main()) """ _response = await self._raw_client.check_guardrails( - vault_id=vault_id, text=text, + vault_id=vault_id, check_toxicity=check_toxicity, deny_topics=deny_topics, request_options=request_options, diff --git a/skyflow/generated/rest/guardrails/raw_client.py b/skyflow/generated/rest/guardrails/raw_client.py index 11030fd3..1328b1b8 100644 --- a/skyflow/generated/rest/guardrails/raw_client.py +++ b/skyflow/generated/rest/guardrails/raw_client.py @@ -11,9 +11,8 @@ from ..errors.bad_request_error import BadRequestError from ..errors.internal_server_error import InternalServerError from ..errors.unauthorized_error import UnauthorizedError -from ..types.check_guardrails_response import CheckGuardrailsResponse +from ..types.detect_guardrails_response import DetectGuardrailsResponse from ..types.error_response import ErrorResponse -from ..types.vault_id import VaultId # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -26,24 +25,25 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): def check_guardrails( self, *, - vault_id: VaultId, text: str, + vault_id: str, check_toxicity: typing.Optional[bool] = OMIT, deny_topics: typing.Optional[typing.Sequence[str]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> HttpResponse[CheckGuardrailsResponse]: + ) -> HttpResponse[DetectGuardrailsResponse]: """ Preserve safety and compliance with usage policies. Parameters ---------- - vault_id : VaultId - text : str Text to check against guardrails. + vault_id : str + ID of the vault. + check_toxicity : typing.Optional[bool] - Check for toxicity in the text. + If `true`, checks for toxicity in the text. deny_topics : typing.Optional[typing.Sequence[str]] List of topics to deny. @@ -53,17 +53,17 @@ def check_guardrails( Returns ------- - HttpResponse[CheckGuardrailsResponse] - A successful response. + HttpResponse[DetectGuardrailsResponse] + OK """ _response = self._client_wrapper.httpx_client.request( "v1/detect/guardrails", method="POST", json={ - "vault_id": vault_id, "text": text, "check_toxicity": check_toxicity, "deny_topics": deny_topics, + "vault_id": vault_id, }, headers={ "content-type": "application/json", @@ -74,9 +74,9 @@ def check_guardrails( try: if 200 <= _response.status_code < 300: _data = typing.cast( - CheckGuardrailsResponse, + DetectGuardrailsResponse, parse_obj_as( - type_=CheckGuardrailsResponse, # type: ignore + type_=DetectGuardrailsResponse, # type: ignore object_=_response.json(), ), ) @@ -127,24 +127,25 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): async def check_guardrails( self, *, - vault_id: VaultId, text: str, + vault_id: str, check_toxicity: typing.Optional[bool] = OMIT, deny_topics: typing.Optional[typing.Sequence[str]] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[CheckGuardrailsResponse]: + ) -> AsyncHttpResponse[DetectGuardrailsResponse]: """ Preserve safety and compliance with usage policies. Parameters ---------- - vault_id : VaultId - text : str Text to check against guardrails. + vault_id : str + ID of the vault. + check_toxicity : typing.Optional[bool] - Check for toxicity in the text. + If `true`, checks for toxicity in the text. deny_topics : typing.Optional[typing.Sequence[str]] List of topics to deny. @@ -154,17 +155,17 @@ async def check_guardrails( Returns ------- - AsyncHttpResponse[CheckGuardrailsResponse] - A successful response. + AsyncHttpResponse[DetectGuardrailsResponse] + OK """ _response = await self._client_wrapper.httpx_client.request( "v1/detect/guardrails", method="POST", json={ - "vault_id": vault_id, "text": text, "check_toxicity": check_toxicity, "deny_topics": deny_topics, + "vault_id": vault_id, }, headers={ "content-type": "application/json", @@ -175,9 +176,9 @@ async def check_guardrails( try: if 200 <= _response.status_code < 300: _data = typing.cast( - CheckGuardrailsResponse, + DetectGuardrailsResponse, parse_obj_as( - type_=CheckGuardrailsResponse, # type: ignore + type_=DetectGuardrailsResponse, # type: ignore object_=_response.json(), ), ) diff --git a/skyflow/generated/rest/records/client.py b/skyflow/generated/rest/records/client.py index cfe15a1c..1b5ddeb7 100644 --- a/skyflow/generated/rest/records/client.py +++ b/skyflow/generated/rest/records/client.py @@ -192,6 +192,13 @@ def record_service_bulk_get_record( client.records.record_service_bulk_get_record( vault_id="vaultID", object_name="objectName", + redaction="DEFAULT", + tokenization=True, + offset="offset", + limit="limit", + download_url=True, + column_name="column_name", + order_by="ASCENDING", ) """ _response = self._raw_client.record_service_bulk_get_record( @@ -409,6 +416,9 @@ def record_service_get_record( vault_id="vaultID", object_name="objectName", id="ID", + redaction="DEFAULT", + tokenization=True, + download_url=True, ) """ _response = self._raw_client.record_service_get_record( @@ -947,6 +957,13 @@ async def main() -> None: await client.records.record_service_bulk_get_record( vault_id="vaultID", object_name="objectName", + redaction="DEFAULT", + tokenization=True, + offset="offset", + limit="limit", + download_url=True, + column_name="column_name", + order_by="ASCENDING", ) @@ -1188,6 +1205,9 @@ async def main() -> None: vault_id="vaultID", object_name="objectName", id="ID", + redaction="DEFAULT", + tokenization=True, + download_url=True, ) diff --git a/skyflow/generated/rest/strings/__init__.py b/skyflow/generated/rest/strings/__init__.py index 4cabb7fb..50bdd77e 100644 --- a/skyflow/generated/rest/strings/__init__.py +++ b/skyflow/generated/rest/strings/__init__.py @@ -2,6 +2,6 @@ # isort: skip_file -from .types import ReidentifyStringRequestFormat +from .types import DeidentifyStringRequestEntityTypesItem -__all__ = ["ReidentifyStringRequestFormat"] +__all__ = ["DeidentifyStringRequestEntityTypesItem"] diff --git a/skyflow/generated/rest/strings/client.py b/skyflow/generated/rest/strings/client.py index 14b2266d..88d8c2c2 100644 --- a/skyflow/generated/rest/strings/client.py +++ b/skyflow/generated/rest/strings/client.py @@ -4,17 +4,13 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.request_options import RequestOptions -from ..types.allow_regex import AllowRegex -from ..types.configuration_id import ConfigurationId from ..types.deidentify_string_response import DeidentifyStringResponse -from ..types.entity_types import EntityTypes -from ..types.reidentify_string_response import ReidentifyStringResponse -from ..types.restrict_regex import RestrictRegex -from ..types.token_type import TokenType +from ..types.format import Format +from ..types.identify_response import IdentifyResponse +from ..types.token_type_mapping import TokenTypeMapping from ..types.transformations import Transformations -from ..types.vault_id import VaultId from .raw_client import AsyncRawStringsClient, RawStringsClient -from .types.reidentify_string_request_format import ReidentifyStringRequestFormat +from .types.deidentify_string_request_entity_types_item import DeidentifyStringRequestEntityTypesItem # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -38,14 +34,14 @@ def with_raw_response(self) -> RawStringsClient: def deidentify_string( self, *, - vault_id: VaultId, text: str, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenType] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyStringResponse: """ @@ -53,30 +49,35 @@ def deidentify_string( Parameters ---------- - vault_id : VaultId - text : str - String to de-identify. + Text to de-identify. - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenType] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyStringResponse - A successful response. + OK Examples -------- @@ -86,19 +87,19 @@ def deidentify_string( token="YOUR_TOKEN", ) client.strings.deidentify_string( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - text="My name is John Doe, and my email is johndoe@acme.com.", + text="text", + vault_id="f4b3b3b3-3b3b-3b3b-3b3b-3b3b3b3b3b3b", ) """ _response = self._raw_client.deidentify_string( - vault_id=vault_id, text=text, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data @@ -106,32 +107,31 @@ def deidentify_string( def reidentify_string( self, *, - text: str, - vault_id: str, - format: typing.Optional[ReidentifyStringRequestFormat] = OMIT, + text: typing.Optional[str] = OMIT, + vault_id: typing.Optional[str] = OMIT, + format: typing.Optional[Format] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> ReidentifyStringResponse: + ) -> IdentifyResponse: """ Re-identifies tokens in a string. Parameters ---------- - text : str - String to re-identify. + text : typing.Optional[str] + Text to reidentify. - vault_id : str + vault_id : typing.Optional[str] ID of the vault where the entities are stored. - format : typing.Optional[ReidentifyStringRequestFormat] - Mapping of perferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - ReidentifyStringResponse - A successful response. + IdentifyResponse + OK Examples -------- @@ -140,10 +140,7 @@ def reidentify_string( client = Skyflow( token="YOUR_TOKEN", ) - client.strings.reidentify_string( - text="My name is [NAME_1], and my email is [EMAIL_1].", - vault_id="1ad6db07-8405-46cf-9a1e-db148ff9f4c5", - ) + client.strings.reidentify_string() """ _response = self._raw_client.reidentify_string( text=text, vault_id=vault_id, format=format, request_options=request_options @@ -169,14 +166,14 @@ def with_raw_response(self) -> AsyncRawStringsClient: async def deidentify_string( self, *, - vault_id: VaultId, text: str, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenType] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> DeidentifyStringResponse: """ @@ -184,30 +181,35 @@ async def deidentify_string( Parameters ---------- - vault_id : VaultId - text : str - String to de-identify. + Text to de-identify. - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenType] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- DeidentifyStringResponse - A successful response. + OK Examples -------- @@ -222,22 +224,22 @@ async def deidentify_string( async def main() -> None: await client.strings.deidentify_string( - vault_id="f4b3b3b33b3b3b3b3b3b3b3b3b3b3b3b", - text="My name is John Doe, and my email is johndoe@acme.com.", + text="text", + vault_id="f4b3b3b3-3b3b-3b3b-3b3b-3b3b3b3b3b3b", ) asyncio.run(main()) """ _response = await self._raw_client.deidentify_string( - vault_id=vault_id, text=text, - configuration_id=configuration_id, + vault_id=vault_id, entity_types=entity_types, token_type=token_type, allow_regex=allow_regex, restrict_regex=restrict_regex, transformations=transformations, + configuration_id=configuration_id, request_options=request_options, ) return _response.data @@ -245,32 +247,31 @@ async def main() -> None: async def reidentify_string( self, *, - text: str, - vault_id: str, - format: typing.Optional[ReidentifyStringRequestFormat] = OMIT, + text: typing.Optional[str] = OMIT, + vault_id: typing.Optional[str] = OMIT, + format: typing.Optional[Format] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> ReidentifyStringResponse: + ) -> IdentifyResponse: """ Re-identifies tokens in a string. Parameters ---------- - text : str - String to re-identify. + text : typing.Optional[str] + Text to reidentify. - vault_id : str + vault_id : typing.Optional[str] ID of the vault where the entities are stored. - format : typing.Optional[ReidentifyStringRequestFormat] - Mapping of perferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - ReidentifyStringResponse - A successful response. + IdentifyResponse + OK Examples -------- @@ -284,10 +285,7 @@ async def reidentify_string( async def main() -> None: - await client.strings.reidentify_string( - text="My name is [NAME_1], and my email is [EMAIL_1].", - vault_id="1ad6db07-8405-46cf-9a1e-db148ff9f4c5", - ) + await client.strings.reidentify_string() asyncio.run(main()) diff --git a/skyflow/generated/rest/strings/raw_client.py b/skyflow/generated/rest/strings/raw_client.py index 3ae9bf41..313c10ce 100644 --- a/skyflow/generated/rest/strings/raw_client.py +++ b/skyflow/generated/rest/strings/raw_client.py @@ -12,17 +12,13 @@ from ..errors.bad_request_error import BadRequestError from ..errors.internal_server_error import InternalServerError from ..errors.unauthorized_error import UnauthorizedError -from ..types.allow_regex import AllowRegex -from ..types.configuration_id import ConfigurationId from ..types.deidentify_string_response import DeidentifyStringResponse -from ..types.entity_types import EntityTypes from ..types.error_response import ErrorResponse -from ..types.reidentify_string_response import ReidentifyStringResponse -from ..types.restrict_regex import RestrictRegex -from ..types.token_type import TokenType +from ..types.format import Format +from ..types.identify_response import IdentifyResponse +from ..types.token_type_mapping import TokenTypeMapping from ..types.transformations import Transformations -from ..types.vault_id import VaultId -from .types.reidentify_string_request_format import ReidentifyStringRequestFormat +from .types.deidentify_string_request_entity_types_item import DeidentifyStringRequestEntityTypesItem # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -35,14 +31,14 @@ def __init__(self, *, client_wrapper: SyncClientWrapper): def deidentify_string( self, *, - vault_id: VaultId, text: str, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenType] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> HttpResponse[DeidentifyStringResponse]: """ @@ -50,47 +46,52 @@ def deidentify_string( Parameters ---------- - vault_id : VaultId - text : str - String to de-identify. + Text to de-identify. - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenType] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- HttpResponse[DeidentifyStringResponse] - A successful response. + OK """ _response = self._client_wrapper.httpx_client.request( "v1/detect/deidentify/string", method="POST", json={ - "vault_id": vault_id, "text": text, - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenType, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -149,32 +150,31 @@ def deidentify_string( def reidentify_string( self, *, - text: str, - vault_id: str, - format: typing.Optional[ReidentifyStringRequestFormat] = OMIT, + text: typing.Optional[str] = OMIT, + vault_id: typing.Optional[str] = OMIT, + format: typing.Optional[Format] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> HttpResponse[ReidentifyStringResponse]: + ) -> HttpResponse[IdentifyResponse]: """ Re-identifies tokens in a string. Parameters ---------- - text : str - String to re-identify. + text : typing.Optional[str] + Text to reidentify. - vault_id : str + vault_id : typing.Optional[str] ID of the vault where the entities are stored. - format : typing.Optional[ReidentifyStringRequestFormat] - Mapping of perferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - HttpResponse[ReidentifyStringResponse] - A successful response. + HttpResponse[IdentifyResponse] + OK """ _response = self._client_wrapper.httpx_client.request( "v1/detect/reidentify/string", @@ -182,9 +182,7 @@ def reidentify_string( json={ "text": text, "vault_id": vault_id, - "format": convert_and_respect_annotation_metadata( - object_=format, annotation=ReidentifyStringRequestFormat, direction="write" - ), + "format": convert_and_respect_annotation_metadata(object_=format, annotation=Format, direction="write"), }, headers={ "content-type": "application/json", @@ -195,9 +193,9 @@ def reidentify_string( try: if 200 <= _response.status_code < 300: _data = typing.cast( - ReidentifyStringResponse, + IdentifyResponse, parse_obj_as( - type_=ReidentifyStringResponse, # type: ignore + type_=IdentifyResponse, # type: ignore object_=_response.json(), ), ) @@ -248,14 +246,14 @@ def __init__(self, *, client_wrapper: AsyncClientWrapper): async def deidentify_string( self, *, - vault_id: VaultId, text: str, - configuration_id: typing.Optional[ConfigurationId] = OMIT, - entity_types: typing.Optional[EntityTypes] = OMIT, - token_type: typing.Optional[TokenType] = OMIT, - allow_regex: typing.Optional[AllowRegex] = OMIT, - restrict_regex: typing.Optional[RestrictRegex] = OMIT, + vault_id: str, + entity_types: typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] = OMIT, + token_type: typing.Optional[TokenTypeMapping] = OMIT, + allow_regex: typing.Optional[typing.Sequence[str]] = OMIT, + restrict_regex: typing.Optional[typing.Sequence[str]] = OMIT, transformations: typing.Optional[Transformations] = OMIT, + configuration_id: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> AsyncHttpResponse[DeidentifyStringResponse]: """ @@ -263,47 +261,52 @@ async def deidentify_string( Parameters ---------- - vault_id : VaultId - text : str - String to de-identify. + Text to de-identify. - configuration_id : typing.Optional[ConfigurationId] + vault_id : str + ID of a vault that you have Detect Invoker or Vault Owner permissions for. - entity_types : typing.Optional[EntityTypes] + entity_types : typing.Optional[typing.Sequence[DeidentifyStringRequestEntityTypesItem]] + Entities to detect and de-identify. - token_type : typing.Optional[TokenType] + token_type : typing.Optional[TokenTypeMapping] - allow_regex : typing.Optional[AllowRegex] + allow_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to display in plaintext. Entities appear in plaintext if an expression matches either the entirety of a detected entity or a substring of it. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. - restrict_regex : typing.Optional[RestrictRegex] + restrict_regex : typing.Optional[typing.Sequence[str]] + Regular expressions to replace with '[RESTRICTED]'. Expressions must match the entirety of a detected entity, not just a substring, for the entity to be restricted. Expressions don't match across entity boundaries. If a string or entity matches both `allow_regex` and `restrict_regex`, the entity is displayed in plaintext. If a string is detected as an entity and a `restrict_regex` pattern matches the entire detected entity, the entity is replaced with '[RESTRICTED]'. If a string is detected as an entity but a `restrict_regex` pattern only matches a substring of it, the `restrict_regex` pattern is ignored, and the entity is processed according to the specified tokenization and transformation settings. transformations : typing.Optional[Transformations] + configuration_id : typing.Optional[str] + ID of the Detect configuration to use for de-identification. Can't be specified with fields other than `vault_id`, `text`, and `file`. + request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- AsyncHttpResponse[DeidentifyStringResponse] - A successful response. + OK """ _response = await self._client_wrapper.httpx_client.request( "v1/detect/deidentify/string", method="POST", json={ - "vault_id": vault_id, "text": text, - "configuration_id": configuration_id, + "vault_id": vault_id, "entity_types": entity_types, "token_type": convert_and_respect_annotation_metadata( - object_=token_type, annotation=TokenType, direction="write" + object_=token_type, annotation=TokenTypeMapping, direction="write" ), "allow_regex": allow_regex, "restrict_regex": restrict_regex, "transformations": convert_and_respect_annotation_metadata( object_=transformations, annotation=Transformations, direction="write" ), + "configuration_id": configuration_id, }, headers={ "content-type": "application/json", @@ -362,32 +365,31 @@ async def deidentify_string( async def reidentify_string( self, *, - text: str, - vault_id: str, - format: typing.Optional[ReidentifyStringRequestFormat] = OMIT, + text: typing.Optional[str] = OMIT, + vault_id: typing.Optional[str] = OMIT, + format: typing.Optional[Format] = OMIT, request_options: typing.Optional[RequestOptions] = None, - ) -> AsyncHttpResponse[ReidentifyStringResponse]: + ) -> AsyncHttpResponse[IdentifyResponse]: """ Re-identifies tokens in a string. Parameters ---------- - text : str - String to re-identify. + text : typing.Optional[str] + Text to reidentify. - vault_id : str + vault_id : typing.Optional[str] ID of the vault where the entities are stored. - format : typing.Optional[ReidentifyStringRequestFormat] - Mapping of perferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. + format : typing.Optional[Format] request_options : typing.Optional[RequestOptions] Request-specific configuration. Returns ------- - AsyncHttpResponse[ReidentifyStringResponse] - A successful response. + AsyncHttpResponse[IdentifyResponse] + OK """ _response = await self._client_wrapper.httpx_client.request( "v1/detect/reidentify/string", @@ -395,9 +397,7 @@ async def reidentify_string( json={ "text": text, "vault_id": vault_id, - "format": convert_and_respect_annotation_metadata( - object_=format, annotation=ReidentifyStringRequestFormat, direction="write" - ), + "format": convert_and_respect_annotation_metadata(object_=format, annotation=Format, direction="write"), }, headers={ "content-type": "application/json", @@ -408,9 +408,9 @@ async def reidentify_string( try: if 200 <= _response.status_code < 300: _data = typing.cast( - ReidentifyStringResponse, + IdentifyResponse, parse_obj_as( - type_=ReidentifyStringResponse, # type: ignore + type_=IdentifyResponse, # type: ignore object_=_response.json(), ), ) diff --git a/skyflow/generated/rest/strings/types/__init__.py b/skyflow/generated/rest/strings/types/__init__.py index 97d06583..592fae37 100644 --- a/skyflow/generated/rest/strings/types/__init__.py +++ b/skyflow/generated/rest/strings/types/__init__.py @@ -2,6 +2,6 @@ # isort: skip_file -from .reidentify_string_request_format import ReidentifyStringRequestFormat +from .deidentify_string_request_entity_types_item import DeidentifyStringRequestEntityTypesItem -__all__ = ["ReidentifyStringRequestFormat"] +__all__ = ["DeidentifyStringRequestEntityTypesItem"] diff --git a/skyflow/generated/rest/strings/types/deidentify_string_request_entity_types_item.py b/skyflow/generated/rest/strings/types/deidentify_string_request_entity_types_item.py new file mode 100644 index 00000000..bde5d084 --- /dev/null +++ b/skyflow/generated/rest/strings/types/deidentify_string_request_entity_types_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifyStringRequestEntityTypesItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/strings/types/reidentify_string_request_format.py b/skyflow/generated/rest/strings/types/reidentify_string_request_format.py deleted file mode 100644 index bfda392c..00000000 --- a/skyflow/generated/rest/strings/types/reidentify_string_request_format.py +++ /dev/null @@ -1,37 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...types.entity_type import EntityType - - -class ReidentifyStringRequestFormat(UniversalBaseModel): - """ - Mapping of perferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. - """ - - redacted: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to fully redact. - """ - - masked: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to mask. - """ - - plaintext: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to return in plaintext. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/__init__.py b/skyflow/generated/rest/types/__init__.py index aa9b4a35..75979081 100644 --- a/skyflow/generated/rest/types/__init__.py +++ b/skyflow/generated/rest/types/__init__.py @@ -2,49 +2,69 @@ # isort: skip_file -from .allow_regex import AllowRegex from .audit_event_audit_resource_type import AuditEventAuditResourceType from .audit_event_context import AuditEventContext from .audit_event_data import AuditEventData from .audit_event_http_info import AuditEventHttpInfo from .batch_record_method import BatchRecordMethod -from .check_guardrails_response import CheckGuardrailsResponse -from .check_guardrails_response_validation import CheckGuardrailsResponseValidation -from .configuration_id import ConfigurationId from .context_access_type import ContextAccessType from .context_auth_mode import ContextAuthMode -from .deidentify_file_output import DeidentifyFileOutput -from .deidentify_file_output_processed_file_type import DeidentifyFileOutputProcessedFileType +from .deidentified_file_output import DeidentifiedFileOutput +from .deidentified_file_output_processed_file_extension import DeidentifiedFileOutputProcessedFileExtension +from .deidentified_file_output_processed_file_type import DeidentifiedFileOutputProcessedFileType from .deidentify_file_response import DeidentifyFileResponse -from .deidentify_status_response import DeidentifyStatusResponse -from .deidentify_status_response_output_type import DeidentifyStatusResponseOutputType -from .deidentify_status_response_status import DeidentifyStatusResponseStatus from .deidentify_string_response import DeidentifyStringResponse -from .detected_entity import DetectedEntity +from .detect_guardrails_response import DetectGuardrailsResponse +from .detect_guardrails_response_validation import DetectGuardrailsResponseValidation +from .detect_runs_response import DetectRunsResponse +from .detect_runs_response_output_type import DetectRunsResponseOutputType +from .detect_runs_response_status import DetectRunsResponseStatus from .detokenize_record_response_value_type import DetokenizeRecordResponseValueType -from .entity_location import EntityLocation -from .entity_type import EntityType -from .entity_types import EntityTypes from .error_response import ErrorResponse from .error_response_error import ErrorResponseError -from .error_string import ErrorString +from .file_data import FileData +from .file_data_data_format import FileDataDataFormat +from .file_data_deidentify_audio import FileDataDeidentifyAudio +from .file_data_deidentify_audio_data_format import FileDataDeidentifyAudioDataFormat +from .file_data_deidentify_document import FileDataDeidentifyDocument +from .file_data_deidentify_document_data_format import FileDataDeidentifyDocumentDataFormat +from .file_data_deidentify_image import FileDataDeidentifyImage +from .file_data_deidentify_image_data_format import FileDataDeidentifyImageDataFormat +from .file_data_deidentify_pdf import FileDataDeidentifyPdf +from .file_data_deidentify_presentation import FileDataDeidentifyPresentation +from .file_data_deidentify_presentation_data_format import FileDataDeidentifyPresentationDataFormat +from .file_data_deidentify_spreadsheet import FileDataDeidentifySpreadsheet +from .file_data_deidentify_spreadsheet_data_format import FileDataDeidentifySpreadsheetDataFormat +from .file_data_deidentify_structured_text import FileDataDeidentifyStructuredText +from .file_data_deidentify_structured_text_data_format import FileDataDeidentifyStructuredTextDataFormat +from .file_data_deidentify_text import FileDataDeidentifyText +from .file_data_reidentify_file import FileDataReidentifyFile +from .file_data_reidentify_file_data_format import FileDataReidentifyFileDataFormat +from .format import Format +from .format_masked_item import FormatMaskedItem +from .format_plaintext_item import FormatPlaintextItem +from .format_redacted_item import FormatRedactedItem from .googlerpc_status import GooglerpcStatus +from .http_code import HttpCode +from .identify_response import IdentifyResponse from .protobuf_any import ProtobufAny from .redaction_enum_redaction import RedactionEnumRedaction +from .reidentified_file_output import ReidentifiedFileOutput +from .reidentified_file_output_processed_file_extension import ReidentifiedFileOutputProcessedFileExtension from .reidentify_file_response import ReidentifyFileResponse -from .reidentify_file_response_output import ReidentifyFileResponseOutput +from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus -from .reidentify_string_response import ReidentifyStringResponse from .request_action_type import RequestActionType from .resource_id import ResourceId -from .restrict_regex import RestrictRegex -from .token_type import TokenType -from .token_type_default import TokenTypeDefault -from .token_type_without_vault import TokenTypeWithoutVault -from .token_type_without_vault_default import TokenTypeWithoutVaultDefault +from .shift_dates import ShiftDates +from .shift_dates_entity_types_item import ShiftDatesEntityTypesItem +from .string_response_entities import StringResponseEntities +from .token_type_mapping import TokenTypeMapping +from .token_type_mapping_default import TokenTypeMappingDefault +from .token_type_mapping_entity_only_item import TokenTypeMappingEntityOnlyItem +from .token_type_mapping_entity_unq_counter_item import TokenTypeMappingEntityUnqCounterItem +from .token_type_mapping_vault_token_item import TokenTypeMappingVaultTokenItem from .transformations import Transformations -from .transformations_shift_dates import TransformationsShiftDates -from .transformations_shift_dates_entity_types_item import TransformationsShiftDatesEntityTypesItem from .upload_file_v_2_response import UploadFileV2Response from .uuid_ import Uuid from .v_1_audit_after_options import V1AuditAfterOptions @@ -78,52 +98,72 @@ from .v_1_update_record_response import V1UpdateRecordResponse from .v_1_vault_field_mapping import V1VaultFieldMapping from .v_1_vault_schema_config import V1VaultSchemaConfig -from .vault_id import VaultId +from .word_character_count import WordCharacterCount __all__ = [ - "AllowRegex", "AuditEventAuditResourceType", "AuditEventContext", "AuditEventData", "AuditEventHttpInfo", "BatchRecordMethod", - "CheckGuardrailsResponse", - "CheckGuardrailsResponseValidation", - "ConfigurationId", "ContextAccessType", "ContextAuthMode", - "DeidentifyFileOutput", - "DeidentifyFileOutputProcessedFileType", + "DeidentifiedFileOutput", + "DeidentifiedFileOutputProcessedFileExtension", + "DeidentifiedFileOutputProcessedFileType", "DeidentifyFileResponse", - "DeidentifyStatusResponse", - "DeidentifyStatusResponseOutputType", - "DeidentifyStatusResponseStatus", "DeidentifyStringResponse", - "DetectedEntity", + "DetectGuardrailsResponse", + "DetectGuardrailsResponseValidation", + "DetectRunsResponse", + "DetectRunsResponseOutputType", + "DetectRunsResponseStatus", "DetokenizeRecordResponseValueType", - "EntityLocation", - "EntityType", - "EntityTypes", "ErrorResponse", "ErrorResponseError", - "ErrorString", + "FileData", + "FileDataDataFormat", + "FileDataDeidentifyAudio", + "FileDataDeidentifyAudioDataFormat", + "FileDataDeidentifyDocument", + "FileDataDeidentifyDocumentDataFormat", + "FileDataDeidentifyImage", + "FileDataDeidentifyImageDataFormat", + "FileDataDeidentifyPdf", + "FileDataDeidentifyPresentation", + "FileDataDeidentifyPresentationDataFormat", + "FileDataDeidentifySpreadsheet", + "FileDataDeidentifySpreadsheetDataFormat", + "FileDataDeidentifyStructuredText", + "FileDataDeidentifyStructuredTextDataFormat", + "FileDataDeidentifyText", + "FileDataReidentifyFile", + "FileDataReidentifyFileDataFormat", + "Format", + "FormatMaskedItem", + "FormatPlaintextItem", + "FormatRedactedItem", "GooglerpcStatus", + "HttpCode", + "IdentifyResponse", "ProtobufAny", "RedactionEnumRedaction", + "ReidentifiedFileOutput", + "ReidentifiedFileOutputProcessedFileExtension", "ReidentifyFileResponse", - "ReidentifyFileResponseOutput", + "ReidentifyFileResponseOutputType", "ReidentifyFileResponseStatus", - "ReidentifyStringResponse", "RequestActionType", "ResourceId", - "RestrictRegex", - "TokenType", - "TokenTypeDefault", - "TokenTypeWithoutVault", - "TokenTypeWithoutVaultDefault", + "ShiftDates", + "ShiftDatesEntityTypesItem", + "StringResponseEntities", + "TokenTypeMapping", + "TokenTypeMappingDefault", + "TokenTypeMappingEntityOnlyItem", + "TokenTypeMappingEntityUnqCounterItem", + "TokenTypeMappingVaultTokenItem", "Transformations", - "TransformationsShiftDates", - "TransformationsShiftDatesEntityTypesItem", "UploadFileV2Response", "Uuid", "V1AuditAfterOptions", @@ -157,5 +197,5 @@ "V1UpdateRecordResponse", "V1VaultFieldMapping", "V1VaultSchemaConfig", - "VaultId", + "WordCharacterCount", ] diff --git a/skyflow/generated/rest/types/allow_regex.py b/skyflow/generated/rest/types/allow_regex.py deleted file mode 100644 index f4164375..00000000 --- a/skyflow/generated/rest/types/allow_regex.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -AllowRegex = typing.List[str] diff --git a/skyflow/generated/rest/types/check_guardrails_response_validation.py b/skyflow/generated/rest/types/check_guardrails_response_validation.py deleted file mode 100644 index dcb0b789..00000000 --- a/skyflow/generated/rest/types/check_guardrails_response_validation.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -CheckGuardrailsResponseValidation = typing.Union[typing.Literal["failed", "passed"], typing.Any] diff --git a/skyflow/generated/rest/types/configuration_id.py b/skyflow/generated/rest/types/configuration_id.py deleted file mode 100644 index 763ae161..00000000 --- a/skyflow/generated/rest/types/configuration_id.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -ConfigurationId = str diff --git a/skyflow/generated/rest/types/deidentified_file_output.py b/skyflow/generated/rest/types/deidentified_file_output.py new file mode 100644 index 00000000..387f57f2 --- /dev/null +++ b/skyflow/generated/rest/types/deidentified_file_output.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .deidentified_file_output_processed_file_extension import DeidentifiedFileOutputProcessedFileExtension +from .deidentified_file_output_processed_file_type import DeidentifiedFileOutputProcessedFileType + + +class DeidentifiedFileOutput(UniversalBaseModel): + """ + Details of output files. Files are specified as Base64-encoded data. + """ + + processed_file: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="processedFile")] = ( + pydantic.Field(default=None) + ) + """ + File content in Base64 format. + """ + + processed_file_type: typing_extensions.Annotated[ + typing.Optional[DeidentifiedFileOutputProcessedFileType], FieldMetadata(alias="processedFileType") + ] = pydantic.Field(default=None) + """ + Type of the processed file. + """ + + processed_file_extension: typing_extensions.Annotated[ + typing.Optional[DeidentifiedFileOutputProcessedFileExtension], FieldMetadata(alias="processedFileExtension") + ] = pydantic.Field(default=None) + """ + Extension of the processed file. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/deidentified_file_output_processed_file_extension.py b/skyflow/generated/rest/types/deidentified_file_output_processed_file_extension.py new file mode 100644 index 00000000..dc3a8519 --- /dev/null +++ b/skyflow/generated/rest/types/deidentified_file_output_processed_file_extension.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DeidentifiedFileOutputProcessedFileExtension = typing.Union[ + typing.Literal[ + "mp3", + "wav", + "pdf", + "txt", + "csv", + "json", + "jpg", + "jpeg", + "tif", + "tiff", + "png", + "bmp", + "xls", + "xlsx", + "doc", + "docx", + "ppt", + "pptx", + "xml", + "dcm", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/deidentify_file_output_processed_file_type.py b/skyflow/generated/rest/types/deidentified_file_output_processed_file_type.py similarity index 55% rename from skyflow/generated/rest/types/deidentify_file_output_processed_file_type.py rename to skyflow/generated/rest/types/deidentified_file_output_processed_file_type.py index 332ce445..ca5aadfc 100644 --- a/skyflow/generated/rest/types/deidentify_file_output_processed_file_type.py +++ b/skyflow/generated/rest/types/deidentified_file_output_processed_file_type.py @@ -2,18 +2,15 @@ import typing -DeidentifyFileOutputProcessedFileType = typing.Union[ +DeidentifiedFileOutputProcessedFileType = typing.Union[ typing.Literal[ - "entities", - "plaintext_transcription", "redacted_audio", - "redacted_diarized_transcription", - "redacted_file", "redacted_image", - "redacted_medical_diarized_transcription", - "redacted_medical_transcription", - "redacted_text", "redacted_transcription", + "redacted_file", + "redacted_text", + "entities", + "redacted_transcription_diarize_json", ], typing.Any, ] diff --git a/skyflow/generated/rest/types/deidentify_file_response.py b/skyflow/generated/rest/types/deidentify_file_response.py index e4e6bf35..4c4503f4 100644 --- a/skyflow/generated/rest/types/deidentify_file_response.py +++ b/skyflow/generated/rest/types/deidentify_file_response.py @@ -8,12 +8,12 @@ class DeidentifyFileResponse(UniversalBaseModel): """ - Response to de-identify a file. + Response to deidentify a file. """ - run_id: str = pydantic.Field() + run_id: typing.Optional[str] = pydantic.Field(default=None) """ - Status URL for the detect run. + Status URL for the Detect run. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/deidentify_status_response.py b/skyflow/generated/rest/types/deidentify_status_response.py deleted file mode 100644 index 68a6cd3f..00000000 --- a/skyflow/generated/rest/types/deidentify_status_response.py +++ /dev/null @@ -1,74 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .deidentify_file_output import DeidentifyFileOutput -from .deidentify_status_response_output_type import DeidentifyStatusResponseOutputType -from .deidentify_status_response_status import DeidentifyStatusResponseStatus - - -class DeidentifyStatusResponse(UniversalBaseModel): - """ - Response to get the status of a detect run. - """ - - status: DeidentifyStatusResponseStatus = pydantic.Field() - """ - Status of the detect run. - """ - - output: typing.List[DeidentifyFileOutput] = pydantic.Field() - """ - How the input file was specified. - """ - - output_type: typing.Optional[DeidentifyStatusResponseOutputType] = pydantic.Field(default=None) - """ - How the output file is specified. - """ - - message: str = pydantic.Field() - """ - Status details about the detect run. - """ - - word_count: typing.Optional[int] = pydantic.Field(default=None) - """ - Number of words in the processed text. - """ - - character_count: typing.Optional[int] = pydantic.Field(default=None) - """ - Number of characters in the processed text. - """ - - size: typing.Optional[float] = pydantic.Field(default=None) - """ - Size of the processed text in kilobytes (KB). - """ - - duration: typing.Optional[float] = pydantic.Field(default=None) - """ - Duration of the processed audio in seconds. - """ - - pages: typing.Optional[int] = pydantic.Field(default=None) - """ - Number of pages in the processed PDF. - """ - - slides: typing.Optional[int] = pydantic.Field(default=None) - """ - Number of slides in the processed presentation. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/deidentify_status_response_output_type.py b/skyflow/generated/rest/types/deidentify_status_response_output_type.py deleted file mode 100644 index 051cc31a..00000000 --- a/skyflow/generated/rest/types/deidentify_status_response_output_type.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyStatusResponseOutputType = typing.Union[typing.Literal["BASE64", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/deidentify_status_response_status.py b/skyflow/generated/rest/types/deidentify_status_response_status.py deleted file mode 100644 index 9ec2931b..00000000 --- a/skyflow/generated/rest/types/deidentify_status_response_status.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -DeidentifyStatusResponseStatus = typing.Union[typing.Literal["FAILED", "IN_PROGRESS", "SUCCESS", "UNKNOWN"], typing.Any] diff --git a/skyflow/generated/rest/types/deidentify_string_response.py b/skyflow/generated/rest/types/deidentify_string_response.py index c141f841..4655f80b 100644 --- a/skyflow/generated/rest/types/deidentify_string_response.py +++ b/skyflow/generated/rest/types/deidentify_string_response.py @@ -4,7 +4,7 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .detected_entity import DetectedEntity +from .string_response_entities import StringResponseEntities class DeidentifyStringResponse(UniversalBaseModel): @@ -12,22 +12,22 @@ class DeidentifyStringResponse(UniversalBaseModel): Response to deidentify a string. """ - processed_text: str = pydantic.Field() + processed_text: typing.Optional[str] = pydantic.Field(default=None) """ De-identified text. """ - entities: typing.List[DetectedEntity] = pydantic.Field() + entities: typing.Optional[typing.List[StringResponseEntities]] = pydantic.Field(default=None) """ Detected entities. """ - word_count: int = pydantic.Field() + word_count: typing.Optional[int] = pydantic.Field(default=None) """ Number of words from the input text. """ - character_count: int = pydantic.Field() + character_count: typing.Optional[int] = pydantic.Field(default=None) """ Number of characters from the input text. """ diff --git a/skyflow/generated/rest/types/check_guardrails_response.py b/skyflow/generated/rest/types/detect_guardrails_response.py similarity index 52% rename from skyflow/generated/rest/types/check_guardrails_response.py rename to skyflow/generated/rest/types/detect_guardrails_response.py index ad8e2dbf..2290ac52 100644 --- a/skyflow/generated/rest/types/check_guardrails_response.py +++ b/skyflow/generated/rest/types/detect_guardrails_response.py @@ -4,32 +4,28 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .check_guardrails_response_validation import CheckGuardrailsResponseValidation +from .detect_guardrails_response_validation import DetectGuardrailsResponseValidation -class CheckGuardrailsResponse(UniversalBaseModel): - """ - Response to check guardrails. - """ - - text: typing.Optional[str] = pydantic.Field(default=None) +class DetectGuardrailsResponse(UniversalBaseModel): + text: str = pydantic.Field() """ Text that was checked against guardrails. """ - toxicity: typing.Optional[bool] = pydantic.Field(default=None) + toxic: typing.Optional[bool] = pydantic.Field(default=None) """ Whether the text is toxic. """ - denied_topics: typing.Optional[bool] = pydantic.Field(default=None) + denied_topic: typing.Optional[bool] = pydantic.Field(default=None) """ - Whether any denied topics were found. + Whether the text included a denied topic. """ - validation: typing.Optional[CheckGuardrailsResponseValidation] = pydantic.Field(default=None) + validation: DetectGuardrailsResponseValidation = pydantic.Field() """ - Validation result. + Whether the text passed validation. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/detect_guardrails_response_validation.py b/skyflow/generated/rest/types/detect_guardrails_response_validation.py new file mode 100644 index 00000000..5a59ddb0 --- /dev/null +++ b/skyflow/generated/rest/types/detect_guardrails_response_validation.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DetectGuardrailsResponseValidation = typing.Union[typing.Literal["failed", "passed"], typing.Any] diff --git a/skyflow/generated/rest/types/detect_runs_response.py b/skyflow/generated/rest/types/detect_runs_response.py new file mode 100644 index 00000000..e49cca0a --- /dev/null +++ b/skyflow/generated/rest/types/detect_runs_response.py @@ -0,0 +1,72 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .deidentified_file_output import DeidentifiedFileOutput +from .detect_runs_response_output_type import DetectRunsResponseOutputType +from .detect_runs_response_status import DetectRunsResponseStatus +from .word_character_count import WordCharacterCount + + +class DetectRunsResponse(UniversalBaseModel): + """ + Response to get the status of a file deidentification request. + """ + + status: typing.Optional[DetectRunsResponseStatus] = pydantic.Field(default=None) + """ + Status of the operation. + """ + + output_type: typing_extensions.Annotated[ + typing.Optional[DetectRunsResponseOutputType], FieldMetadata(alias="outputType") + ] = pydantic.Field(default=None) + """ + Format of the output file. + """ + + output: typing.Optional[typing.List[DeidentifiedFileOutput]] = pydantic.Field(default=None) + """ + Details of output files. Files are specified as Base64-encoded data. + """ + + message: typing.Optional[str] = pydantic.Field(default=None) + """ + Status details about the Detect run. + """ + + size: typing.Optional[float] = pydantic.Field(default=None) + """ + Size of the processed file in kilobytes (KB). + """ + + word_character_count: typing_extensions.Annotated[ + typing.Optional[WordCharacterCount], FieldMetadata(alias="wordCharacterCount") + ] = None + duration: typing.Optional[float] = pydantic.Field(default=None) + """ + Duration of the processed audio in seconds. + """ + + pages: typing.Optional[int] = pydantic.Field(default=None) + """ + Number of pages in the processed PDF. + """ + + slides: typing.Optional[int] = pydantic.Field(default=None) + """ + Number of slides in the processed presentation. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/detect_runs_response_output_type.py b/skyflow/generated/rest/types/detect_runs_response_output_type.py new file mode 100644 index 00000000..a24870c1 --- /dev/null +++ b/skyflow/generated/rest/types/detect_runs_response_output_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DetectRunsResponseOutputType = typing.Union[typing.Literal["UNKNOWN", "BASE64"], typing.Any] diff --git a/skyflow/generated/rest/types/detect_runs_response_status.py b/skyflow/generated/rest/types/detect_runs_response_status.py new file mode 100644 index 00000000..979454c6 --- /dev/null +++ b/skyflow/generated/rest/types/detect_runs_response_status.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +DetectRunsResponseStatus = typing.Union[typing.Literal["UNKNOWN", "FAILED", "SUCCESS", "IN_PROGRESS"], typing.Any] diff --git a/skyflow/generated/rest/types/entity_location.py b/skyflow/generated/rest/types/entity_location.py deleted file mode 100644 index 487f9c72..00000000 --- a/skyflow/generated/rest/types/entity_location.py +++ /dev/null @@ -1,41 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel - - -class EntityLocation(UniversalBaseModel): - """ - Locations of an entity in the text. - """ - - start_index: typing.Optional[int] = pydantic.Field(default=None) - """ - Index of the first character of the string in the original text. - """ - - end_index: typing.Optional[int] = pydantic.Field(default=None) - """ - Index of the last character of the string in the original text. - """ - - start_index_processed: typing.Optional[int] = pydantic.Field(default=None) - """ - Index of the first character of the string in the processed text. - """ - - end_index_processed: typing.Optional[int] = pydantic.Field(default=None) - """ - Index of the last character of the string in the processed text. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/entity_types.py b/skyflow/generated/rest/types/entity_types.py deleted file mode 100644 index 3adb0438..00000000 --- a/skyflow/generated/rest/types/entity_types.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -from .entity_type import EntityType - -EntityTypes = typing.List[EntityType] diff --git a/skyflow/generated/rest/types/error_response_error.py b/skyflow/generated/rest/types/error_response_error.py index 722b69cc..efe080d3 100644 --- a/skyflow/generated/rest/types/error_response_error.py +++ b/skyflow/generated/rest/types/error_response_error.py @@ -4,6 +4,7 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .http_code import HttpCode class ErrorResponseError(UniversalBaseModel): @@ -12,16 +13,8 @@ class ErrorResponseError(UniversalBaseModel): gRPC status codes. See https://grpc.io/docs/guides/status-codes. """ - http_code: int = pydantic.Field() - """ - HTTP status codes. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status. - """ - - http_status: str = pydantic.Field() - """ - HTTP status message. - """ - + http_code: HttpCode + http_status: str message: str details: typing.Optional[typing.List[typing.Dict[str, typing.Optional[typing.Any]]]] = None diff --git a/skyflow/generated/rest/types/error_string.py b/skyflow/generated/rest/types/error_string.py deleted file mode 100644 index 4ebbdff4..00000000 --- a/skyflow/generated/rest/types/error_string.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -ErrorString = str \ No newline at end of file diff --git a/skyflow/generated/rest/types/reidentify_file_response_output.py b/skyflow/generated/rest/types/file_data.py similarity index 53% rename from skyflow/generated/rest/types/reidentify_file_response_output.py rename to skyflow/generated/rest/types/file_data.py index bda44777..846837e0 100644 --- a/skyflow/generated/rest/types/reidentify_file_response_output.py +++ b/skyflow/generated/rest/types/file_data.py @@ -3,23 +3,25 @@ import typing import pydantic +import typing_extensions from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_data_format import FileDataDataFormat -class ReidentifyFileResponseOutput(UniversalBaseModel): - processed_file: str = pydantic.Field() +class FileData(UniversalBaseModel): """ - Re-identified file content in base64 format. + File to process. Files are specified as Base64-encoded data. """ - processed_file_type: typing.Literal["reidentified_file"] = pydantic.Field(default="reidentified_file") + base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Type of the processed file. + Base64-encoded data of the file. """ - processed_file_extension: str = pydantic.Field() + data_format: FileDataDataFormat = pydantic.Field() """ - Extension of the processed file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_file_data_format.py b/skyflow/generated/rest/types/file_data_data_format.py similarity index 89% rename from skyflow/generated/rest/files/types/deidentify_file_request_file_data_format.py rename to skyflow/generated/rest/types/file_data_data_format.py index f3294014..48205005 100644 --- a/skyflow/generated/rest/files/types/deidentify_file_request_file_data_format.py +++ b/skyflow/generated/rest/types/file_data_data_format.py @@ -2,28 +2,29 @@ import typing -DeidentifyFileRequestFileDataFormat = typing.Union[ +FileDataDataFormat = typing.Union[ typing.Literal[ - "bmp", - "csv", - "dcm", - "doc", - "docx", - "jpeg", - "jpg", - "json", "mp3", + "wav", "pdf", - "png", - "ppt", - "pptx", + "txt", + "csv", + "json", + "jpg", + "jpeg", "tif", "tiff", - "txt", - "wav", + "png", + "bmp", "xls", "xlsx", + "doc", + "docx", + "ppt", + "pptx", "xml", + "dcm", + "jsonl", ], typing.Any, ] diff --git a/skyflow/generated/rest/files/types/deidentify_file_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_audio.py similarity index 53% rename from skyflow/generated/rest/files/types/deidentify_file_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_audio.py index 3e062bb2..8973b22a 100644 --- a/skyflow/generated/rest/files/types/deidentify_file_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_audio.py @@ -4,24 +4,24 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_file_request_file_data_format import DeidentifyFileRequestFileDataFormat +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_audio_data_format import FileDataDeidentifyAudioDataFormat -class DeidentifyFileRequestFile(UniversalBaseModel): +class FileDataDeidentifyAudio(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ - data_format: DeidentifyFileRequestFileDataFormat = pydantic.Field() + data_format: FileDataDeidentifyAudioDataFormat = pydantic.Field() """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_audio_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_audio_data_format.py new file mode 100644 index 00000000..85f163c2 --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_audio_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifyAudioDataFormat = typing.Union[typing.Literal["mp3", "wav"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_audio_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_document.py similarity index 54% rename from skyflow/generated/rest/files/types/deidentify_audio_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_document.py index 3ea4c16f..ef9b61f2 100644 --- a/skyflow/generated/rest/files/types/deidentify_audio_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_document.py @@ -4,24 +4,24 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_audio_request_file_data_format import DeidentifyAudioRequestFileDataFormat +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_document_data_format import FileDataDeidentifyDocumentDataFormat -class DeidentifyAudioRequestFile(UniversalBaseModel): +class FileDataDeidentifyDocument(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ - data_format: DeidentifyAudioRequestFileDataFormat = pydantic.Field() + data_format: FileDataDeidentifyDocumentDataFormat = pydantic.Field() """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_document_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_document_data_format.py new file mode 100644 index 00000000..beaf1ffc --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_document_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifyDocumentDataFormat = typing.Union[typing.Literal["pdf", "doc", "docx"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_image_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_image.py similarity index 53% rename from skyflow/generated/rest/files/types/deidentify_image_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_image.py index 36677964..c1f76298 100644 --- a/skyflow/generated/rest/files/types/deidentify_image_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_image.py @@ -4,24 +4,24 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_image_request_file_data_format import DeidentifyImageRequestFileDataFormat +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_image_data_format import FileDataDeidentifyImageDataFormat -class DeidentifyImageRequestFile(UniversalBaseModel): +class FileDataDeidentifyImage(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ - data_format: DeidentifyImageRequestFileDataFormat = pydantic.Field() + data_format: FileDataDeidentifyImageDataFormat = pydantic.Field() """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_image_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_image_data_format.py new file mode 100644 index 00000000..97dab89e --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_image_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifyImageDataFormat = typing.Union[typing.Literal["jpg", "jpeg", "tif", "tiff", "png", "bmp"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_pdf_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_pdf.py similarity index 66% rename from skyflow/generated/rest/files/types/deidentify_pdf_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_pdf.py index da461fd1..0dc2c1a5 100644 --- a/skyflow/generated/rest/files/types/deidentify_pdf_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_pdf.py @@ -4,23 +4,23 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata -class DeidentifyPdfRequestFile(UniversalBaseModel): +class FileDataDeidentifyPdf(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ data_format: typing.Literal["pdf"] = pydantic.Field(default="pdf") """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_presentation.py b/skyflow/generated/rest/types/file_data_deidentify_presentation.py new file mode 100644 index 00000000..17fa004e --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_presentation.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_presentation_data_format import FileDataDeidentifyPresentationDataFormat + + +class FileDataDeidentifyPresentation(UniversalBaseModel): + """ + File to process. Files are specified as Base64-encoded data. + """ + + base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() + """ + Base64-encoded data of the file. + """ + + data_format: FileDataDeidentifyPresentationDataFormat = pydantic.Field() + """ + Format of the file. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/file_data_deidentify_presentation_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_presentation_data_format.py new file mode 100644 index 00000000..d811746d --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_presentation_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifyPresentationDataFormat = typing.Union[typing.Literal["ppt", "pptx"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_document_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_spreadsheet.py similarity index 53% rename from skyflow/generated/rest/files/types/deidentify_document_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_spreadsheet.py index cbf36c59..17ead2f1 100644 --- a/skyflow/generated/rest/files/types/deidentify_document_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_spreadsheet.py @@ -4,24 +4,24 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_document_request_file_data_format import DeidentifyDocumentRequestFileDataFormat +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_spreadsheet_data_format import FileDataDeidentifySpreadsheetDataFormat -class DeidentifyDocumentRequestFile(UniversalBaseModel): +class FileDataDeidentifySpreadsheet(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ - data_format: DeidentifyDocumentRequestFileDataFormat = pydantic.Field() + data_format: FileDataDeidentifySpreadsheetDataFormat = pydantic.Field() """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_spreadsheet_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_spreadsheet_data_format.py new file mode 100644 index 00000000..f48a11aa --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_spreadsheet_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifySpreadsheetDataFormat = typing.Union[typing.Literal["csv", "xls", "xlsx"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_structured_text.py similarity index 53% rename from skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_structured_text.py index f97e1c03..a78a11ab 100644 --- a/skyflow/generated/rest/files/types/deidentify_spreadsheet_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_structured_text.py @@ -4,24 +4,24 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata -from .deidentify_spreadsheet_request_file_data_format import DeidentifySpreadsheetRequestFileDataFormat +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_deidentify_structured_text_data_format import FileDataDeidentifyStructuredTextDataFormat -class DeidentifySpreadsheetRequestFile(UniversalBaseModel): +class FileDataDeidentifyStructuredText(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ - data_format: DeidentifySpreadsheetRequestFileDataFormat = pydantic.Field() + data_format: FileDataDeidentifyStructuredTextDataFormat = pydantic.Field() """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_deidentify_structured_text_data_format.py b/skyflow/generated/rest/types/file_data_deidentify_structured_text_data_format.py new file mode 100644 index 00000000..267b17a6 --- /dev/null +++ b/skyflow/generated/rest/types/file_data_deidentify_structured_text_data_format.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataDeidentifyStructuredTextDataFormat = typing.Union[typing.Literal["json", "xml"], typing.Any] diff --git a/skyflow/generated/rest/files/types/deidentify_text_request_file.py b/skyflow/generated/rest/types/file_data_deidentify_text.py similarity index 66% rename from skyflow/generated/rest/files/types/deidentify_text_request_file.py rename to skyflow/generated/rest/types/file_data_deidentify_text.py index 193aa7bd..c8637361 100644 --- a/skyflow/generated/rest/files/types/deidentify_text_request_file.py +++ b/skyflow/generated/rest/types/file_data_deidentify_text.py @@ -4,23 +4,23 @@ import pydantic import typing_extensions -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...core.serialization import FieldMetadata +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata -class DeidentifyTextRequestFile(UniversalBaseModel): +class FileDataDeidentifyText(UniversalBaseModel): """ - File to de-identify. Files are specified as Base64-encoded data. + File to process. Files are specified as Base64-encoded data. """ base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() """ - Base64-encoded data of the file to de-identify. + Base64-encoded data of the file. """ data_format: typing.Literal["txt"] = pydantic.Field(default="txt") """ - Data format of the file. + Format of the file. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/file_data_reidentify_file.py b/skyflow/generated/rest/types/file_data_reidentify_file.py new file mode 100644 index 00000000..d691bcc0 --- /dev/null +++ b/skyflow/generated/rest/types/file_data_reidentify_file.py @@ -0,0 +1,34 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata +from .file_data_reidentify_file_data_format import FileDataReidentifyFileDataFormat + + +class FileDataReidentifyFile(UniversalBaseModel): + """ + File to process. Files are specified as Base64-encoded data. + """ + + base_64: typing_extensions.Annotated[str, FieldMetadata(alias="base64")] = pydantic.Field() + """ + Base64-encoded data of the file. + """ + + data_format: FileDataReidentifyFileDataFormat = pydantic.Field() + """ + Format of the file. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/file_data_reidentify_file_data_format.py b/skyflow/generated/rest/types/file_data_reidentify_file_data_format.py new file mode 100644 index 00000000..d0cc44ba --- /dev/null +++ b/skyflow/generated/rest/types/file_data_reidentify_file_data_format.py @@ -0,0 +1,7 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FileDataReidentifyFileDataFormat = typing.Union[ + typing.Literal["txt", "csv", "json", "xls", "xlsx", "doc", "docx", "xml"], typing.Any +] diff --git a/skyflow/generated/rest/files/types/reidentify_file_request_format.py b/skyflow/generated/rest/types/format.py similarity index 58% rename from skyflow/generated/rest/files/types/reidentify_file_request_format.py rename to skyflow/generated/rest/types/format.py index ec7ca5f1..a12992cd 100644 --- a/skyflow/generated/rest/files/types/reidentify_file_request_format.py +++ b/skyflow/generated/rest/types/format.py @@ -3,26 +3,28 @@ import typing import pydantic -from ...core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from ...types.entity_type import EntityType +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .format_masked_item import FormatMaskedItem +from .format_plaintext_item import FormatPlaintextItem +from .format_redacted_item import FormatRedactedItem -class ReidentifyFileRequestFormat(UniversalBaseModel): +class Format(UniversalBaseModel): """ Mapping of preferred data formatting options to entity types. Returned values are dependent on the configuration of the vault storing the data and the permissions of the user or account making the request. """ - redacted: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) + redacted: typing.Optional[typing.List[FormatRedactedItem]] = pydantic.Field(default=None) """ Entity types to fully redact. """ - masked: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) + masked: typing.Optional[typing.List[FormatMaskedItem]] = pydantic.Field(default=None) """ Entity types to mask. """ - plaintext: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) + plaintext: typing.Optional[typing.List[FormatPlaintextItem]] = pydantic.Field(default=None) """ Entity types to return in plaintext. """ diff --git a/skyflow/generated/rest/types/entity_type.py b/skyflow/generated/rest/types/format_masked_item.py similarity index 98% rename from skyflow/generated/rest/types/entity_type.py rename to skyflow/generated/rest/types/format_masked_item.py index 1a343410..eeae9770 100644 --- a/skyflow/generated/rest/types/entity_type.py +++ b/skyflow/generated/rest/types/format_masked_item.py @@ -2,78 +2,78 @@ import typing -EntityType = typing.Union[ +FormatMaskedItem = typing.Union[ typing.Literal[ - "account_number", "age", - "all", "bank_account", - "blood_type", - "condition", - "corporate_action", "credit_card", "credit_card_expiration", "cvv", "date", "date_interval", - "day", "dob", - "dose", "driver_license", - "drug", - "duration", - "effect", "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", "event", "filename", - "financial_metric", "gender", - "healthcare_number", - "injury", - "ip_address", "language", - "location", "location_address", - "location_address_street", "location_city", "location_coordinate", "location_country", "location_state", "location_zip", "marital_status", - "medical_code", - "medical_process", "money", - "month", - "name", - "name_family", - "name_given", "name_medical_professional", - "numerical_pii", "occupation", "organization", - "organization_id", "organization_medical_facility", "origin", "passport_number", "password", - "phone_number", "physical_attribute", "political_affiliation", - "product", - "project", "religion", - "routing_number", - "sexuality", - "ssn", - "statistics", "time", - "trend", - "url", "username", - "vehicle_id", - "year", "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", ], typing.Any, ] diff --git a/skyflow/generated/rest/types/format_plaintext_item.py b/skyflow/generated/rest/types/format_plaintext_item.py new file mode 100644 index 00000000..62dfc4bc --- /dev/null +++ b/skyflow/generated/rest/types/format_plaintext_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FormatPlaintextItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/format_redacted_item.py b/skyflow/generated/rest/types/format_redacted_item.py new file mode 100644 index 00000000..d2aee020 --- /dev/null +++ b/skyflow/generated/rest/types/format_redacted_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +FormatRedactedItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/vault_id.py b/skyflow/generated/rest/types/http_code.py similarity index 81% rename from skyflow/generated/rest/types/vault_id.py rename to skyflow/generated/rest/types/http_code.py index 02ae7d21..5fc9a3fb 100644 --- a/skyflow/generated/rest/types/vault_id.py +++ b/skyflow/generated/rest/types/http_code.py @@ -1,3 +1,3 @@ # This file was auto-generated by Fern from our API Definition. -VaultId = str +HttpCode = int diff --git a/skyflow/generated/rest/types/reidentify_string_response.py b/skyflow/generated/rest/types/identify_response.py similarity index 78% rename from skyflow/generated/rest/types/reidentify_string_response.py rename to skyflow/generated/rest/types/identify_response.py index cbb1b836..67786621 100644 --- a/skyflow/generated/rest/types/reidentify_string_response.py +++ b/skyflow/generated/rest/types/identify_response.py @@ -6,12 +6,12 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -class ReidentifyStringResponse(UniversalBaseModel): +class IdentifyResponse(UniversalBaseModel): """ - Re-identify string response. + Response after identifying text. """ - text: typing.Optional[str] = pydantic.Field(default=None) + text: str = pydantic.Field() """ Re-identified text. """ diff --git a/skyflow/generated/rest/types/deidentify_file_output.py b/skyflow/generated/rest/types/reidentified_file_output.py similarity index 56% rename from skyflow/generated/rest/types/deidentify_file_output.py rename to skyflow/generated/rest/types/reidentified_file_output.py index 7e17e168..c7f8544e 100644 --- a/skyflow/generated/rest/types/deidentify_file_output.py +++ b/skyflow/generated/rest/types/reidentified_file_output.py @@ -4,25 +4,27 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .deidentify_file_output_processed_file_type import DeidentifyFileOutputProcessedFileType +from .reidentified_file_output_processed_file_extension import ReidentifiedFileOutputProcessedFileExtension -class DeidentifyFileOutput(UniversalBaseModel): +class ReidentifiedFileOutput(UniversalBaseModel): """ - Details and contents of the processed file. + Details of output files. Files are specified as Base64-encoded data. """ processed_file: typing.Optional[str] = pydantic.Field(default=None) """ - URL or base64-encoded data of the output. + File content in Base64 format. """ - processed_file_type: typing.Optional[DeidentifyFileOutputProcessedFileType] = pydantic.Field(default=None) + processed_file_type: typing.Optional[typing.Literal["reidentified_file"]] = pydantic.Field(default=None) """ Type of the processed file. """ - processed_file_extension: typing.Optional[str] = pydantic.Field(default=None) + processed_file_extension: typing.Optional[ReidentifiedFileOutputProcessedFileExtension] = pydantic.Field( + default=None + ) """ Extension of the processed file. """ diff --git a/skyflow/generated/rest/types/reidentified_file_output_processed_file_extension.py b/skyflow/generated/rest/types/reidentified_file_output_processed_file_extension.py new file mode 100644 index 00000000..c6cbfd02 --- /dev/null +++ b/skyflow/generated/rest/types/reidentified_file_output_processed_file_extension.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +ReidentifiedFileOutputProcessedFileExtension = typing.Union[ + typing.Literal[ + "mp3", + "wav", + "pdf", + "txt", + "csv", + "json", + "jpg", + "jpeg", + "tif", + "tiff", + "png", + "bmp", + "xls", + "xlsx", + "doc", + "docx", + "ppt", + "pptx", + "xml", + "dcm", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/reidentify_file_response.py b/skyflow/generated/rest/types/reidentify_file_response.py index c67b41ac..ef076c72 100644 --- a/skyflow/generated/rest/types/reidentify_file_response.py +++ b/skyflow/generated/rest/types/reidentify_file_response.py @@ -4,26 +4,27 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .reidentify_file_response_output import ReidentifyFileResponseOutput +from .reidentified_file_output import ReidentifiedFileOutput +from .reidentify_file_response_output_type import ReidentifyFileResponseOutputType from .reidentify_file_response_status import ReidentifyFileResponseStatus class ReidentifyFileResponse(UniversalBaseModel): """ - Response to re-identify a file. + Response to get the status & response of a file re-identification request. """ - status: ReidentifyFileResponseStatus = pydantic.Field() + status: typing.Optional[ReidentifyFileResponseStatus] = pydantic.Field(default=None) """ - Status of the re-identify operation. + Status of the operation. """ - output_type: typing.Literal["BASE64"] = pydantic.Field(default="BASE64") + output_type: typing.Optional[ReidentifyFileResponseOutputType] = pydantic.Field(default=None) """ Format of the output file. """ - output: ReidentifyFileResponseOutput + output: typing.Optional[ReidentifiedFileOutput] = None if IS_PYDANTIC_V2: model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 diff --git a/skyflow/generated/rest/types/reidentify_file_response_output_type.py b/skyflow/generated/rest/types/reidentify_file_response_output_type.py index 03048c85..b7b9e8c4 100644 --- a/skyflow/generated/rest/types/reidentify_file_response_output_type.py +++ b/skyflow/generated/rest/types/reidentify_file_response_output_type.py @@ -2,4 +2,4 @@ import typing -ReidentifyFileResponseOutputType = typing.Union[typing.Literal["BASE64", "UNKNOWN"], typing.Any] +ReidentifyFileResponseOutputType = typing.Union[typing.Literal["UNKNOWN", "BASE64"], typing.Any] diff --git a/skyflow/generated/rest/types/reidentify_file_response_status.py b/skyflow/generated/rest/types/reidentify_file_response_status.py index 8bdfa1e0..ffa5dfc3 100644 --- a/skyflow/generated/rest/types/reidentify_file_response_status.py +++ b/skyflow/generated/rest/types/reidentify_file_response_status.py @@ -2,4 +2,4 @@ import typing -ReidentifyFileResponseStatus = typing.Union[typing.Literal["FAILED", "IN_PROGRESS", "SUCCESS", "UNKNOWN"], typing.Any] +ReidentifyFileResponseStatus = typing.Union[typing.Literal["UNKNOWN", "FAILED", "SUCCESS", "IN_PROGRESS"], typing.Any] diff --git a/skyflow/generated/rest/types/restrict_regex.py b/skyflow/generated/rest/types/restrict_regex.py deleted file mode 100644 index 06dd46b7..00000000 --- a/skyflow/generated/rest/types/restrict_regex.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -RestrictRegex = typing.List[str] diff --git a/skyflow/generated/rest/types/transformations_shift_dates.py b/skyflow/generated/rest/types/shift_dates.py similarity index 75% rename from skyflow/generated/rest/types/transformations_shift_dates.py rename to skyflow/generated/rest/types/shift_dates.py index 21b21af8..7a1af257 100644 --- a/skyflow/generated/rest/types/transformations_shift_dates.py +++ b/skyflow/generated/rest/types/shift_dates.py @@ -4,25 +4,25 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .transformations_shift_dates_entity_types_item import TransformationsShiftDatesEntityTypesItem +from .shift_dates_entity_types_item import ShiftDatesEntityTypesItem -class TransformationsShiftDates(UniversalBaseModel): +class ShiftDates(UniversalBaseModel): """ Shift dates by a specified number of days. """ - max_days: typing.Optional[int] = pydantic.Field(default=None) + min_days: typing.Optional[int] = pydantic.Field(default=None) """ - Maximum number of days to shift the date by. + Minimum number of days to shift the date by. """ - min_days: typing.Optional[int] = pydantic.Field(default=None) + max_days: typing.Optional[int] = pydantic.Field(default=None) """ - Minimum number of days to shift the date by. + Maximum number of days to shift the date by. """ - entity_types: typing.Optional[typing.List[TransformationsShiftDatesEntityTypesItem]] = pydantic.Field(default=None) + entity_types: typing.Optional[typing.List[ShiftDatesEntityTypesItem]] = pydantic.Field(default=None) """ Entity types to shift dates for. """ diff --git a/skyflow/generated/rest/types/shift_dates_entity_types_item.py b/skyflow/generated/rest/types/shift_dates_entity_types_item.py new file mode 100644 index 00000000..b0f10d19 --- /dev/null +++ b/skyflow/generated/rest/types/shift_dates_entity_types_item.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +ShiftDatesEntityTypesItem = typing.Union[typing.Literal["date", "date_interval", "dob"], typing.Any] diff --git a/skyflow/generated/rest/types/detected_entity.py b/skyflow/generated/rest/types/string_response_entities.py similarity index 82% rename from skyflow/generated/rest/types/detected_entity.py rename to skyflow/generated/rest/types/string_response_entities.py index c34ba2ca..0d72524a 100644 --- a/skyflow/generated/rest/types/detected_entity.py +++ b/skyflow/generated/rest/types/string_response_entities.py @@ -4,12 +4,11 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .entity_location import EntityLocation -class DetectedEntity(UniversalBaseModel): +class StringResponseEntities(UniversalBaseModel): """ - Detected entities. + Detected entities for String """ token: typing.Optional[str] = pydantic.Field(default=None) @@ -22,7 +21,6 @@ class DetectedEntity(UniversalBaseModel): Original text of the entity. """ - location: typing.Optional[EntityLocation] = None entity_type: typing.Optional[str] = pydantic.Field(default=None) """ Highest-rated label. @@ -30,7 +28,7 @@ class DetectedEntity(UniversalBaseModel): entity_scores: typing.Optional[typing.Dict[str, float]] = pydantic.Field(default=None) """ - entity_scores and their scores. + Labels and their scores. """ if IS_PYDANTIC_V2: diff --git a/skyflow/generated/rest/types/token_type.py b/skyflow/generated/rest/types/token_type.py deleted file mode 100644 index 200b9630..00000000 --- a/skyflow/generated/rest/types/token_type.py +++ /dev/null @@ -1,39 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .entity_type import EntityType -from .token_type_default import TokenTypeDefault - - -class TokenType(UniversalBaseModel): - """ - Mapping of tokens to generation for detected entities. Can't be specified together with `token_type`. - """ - - default: typing.Optional[TokenTypeDefault] = None - vault_token: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to replace with vault tokens. - """ - - entity_unq_counter: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to replace with entity tokens with unique counters. - """ - - entity_only: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to replace with entity tokens. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/token_type_default.py b/skyflow/generated/rest/types/token_type_default.py deleted file mode 100644 index cfda9f4b..00000000 --- a/skyflow/generated/rest/types/token_type_default.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -TokenTypeDefault = typing.Union[typing.Literal["entity_only", "entity_unq_counter", "vault_token"], typing.Any] diff --git a/skyflow/generated/rest/types/token_type_mapping.py b/skyflow/generated/rest/types/token_type_mapping.py new file mode 100644 index 00000000..5644fd01 --- /dev/null +++ b/skyflow/generated/rest/types/token_type_mapping.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .token_type_mapping_default import TokenTypeMappingDefault +from .token_type_mapping_entity_only_item import TokenTypeMappingEntityOnlyItem +from .token_type_mapping_entity_unq_counter_item import TokenTypeMappingEntityUnqCounterItem +from .token_type_mapping_vault_token_item import TokenTypeMappingVaultTokenItem + + +class TokenTypeMapping(UniversalBaseModel): + """ + Mapping of token types to detected entities. + """ + + vault_token: typing.Optional[typing.List[TokenTypeMappingVaultTokenItem]] = pydantic.Field(default=None) + """ + Entity types to replace with vault tokens. + """ + + entity_only: typing.Optional[typing.List[TokenTypeMappingEntityOnlyItem]] = pydantic.Field(default=None) + """ + Entity types to replace with entity tokens. + """ + + entity_unq_counter: typing.Optional[typing.List[TokenTypeMappingEntityUnqCounterItem]] = pydantic.Field( + default=None + ) + """ + Entity types to replace with entity tokens with unique counters. + """ + + default: typing.Optional[TokenTypeMappingDefault] = pydantic.Field(default=None) + """ + Default token type to generate for detected entities. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/token_type_mapping_default.py b/skyflow/generated/rest/types/token_type_mapping_default.py new file mode 100644 index 00000000..36a9e4ba --- /dev/null +++ b/skyflow/generated/rest/types/token_type_mapping_default.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +TokenTypeMappingDefault = typing.Union[typing.Literal["entity_unq_counter", "entity_only", "vault_token"], typing.Any] diff --git a/skyflow/generated/rest/types/token_type_mapping_entity_only_item.py b/skyflow/generated/rest/types/token_type_mapping_entity_only_item.py new file mode 100644 index 00000000..d3309ab9 --- /dev/null +++ b/skyflow/generated/rest/types/token_type_mapping_entity_only_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +TokenTypeMappingEntityOnlyItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/token_type_mapping_entity_unq_counter_item.py b/skyflow/generated/rest/types/token_type_mapping_entity_unq_counter_item.py new file mode 100644 index 00000000..219f79ac --- /dev/null +++ b/skyflow/generated/rest/types/token_type_mapping_entity_unq_counter_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +TokenTypeMappingEntityUnqCounterItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/token_type_mapping_vault_token_item.py b/skyflow/generated/rest/types/token_type_mapping_vault_token_item.py new file mode 100644 index 00000000..17178ea6 --- /dev/null +++ b/skyflow/generated/rest/types/token_type_mapping_vault_token_item.py @@ -0,0 +1,79 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +TokenTypeMappingVaultTokenItem = typing.Union[ + typing.Literal[ + "age", + "bank_account", + "credit_card", + "credit_card_expiration", + "cvv", + "date", + "date_interval", + "dob", + "driver_license", + "email_address", + "healthcare_number", + "ip_address", + "location", + "name", + "numerical_pii", + "phone_number", + "ssn", + "url", + "vehicle_id", + "medical_code", + "name_family", + "name_given", + "account_number", + "event", + "filename", + "gender", + "language", + "location_address", + "location_city", + "location_coordinate", + "location_country", + "location_state", + "location_zip", + "marital_status", + "money", + "name_medical_professional", + "occupation", + "organization", + "organization_medical_facility", + "origin", + "passport_number", + "password", + "physical_attribute", + "political_affiliation", + "religion", + "time", + "username", + "zodiac_sign", + "blood_type", + "condition", + "dose", + "drug", + "injury", + "medical_process", + "statistics", + "routing_number", + "corporate_action", + "financial_metric", + "product", + "trend", + "duration", + "location_address_street", + "all", + "sexuality", + "effect", + "project", + "organization_id", + "day", + "month", + "year", + ], + typing.Any, +] diff --git a/skyflow/generated/rest/types/token_type_without_vault.py b/skyflow/generated/rest/types/token_type_without_vault.py deleted file mode 100644 index d79a3477..00000000 --- a/skyflow/generated/rest/types/token_type_without_vault.py +++ /dev/null @@ -1,34 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -import pydantic -from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .entity_type import EntityType -from .token_type_without_vault_default import TokenTypeWithoutVaultDefault - - -class TokenTypeWithoutVault(UniversalBaseModel): - """ - Mapping of tokens to generation for detected entities. Can't be specified together with `token_type`. - """ - - default: typing.Optional[TokenTypeWithoutVaultDefault] = None - entity_unq_counter: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to replace with entity tokens with unique counters. - """ - - entity_only: typing.Optional[typing.List[EntityType]] = pydantic.Field(default=None) - """ - Entity types to replace with entity tokens. - """ - - if IS_PYDANTIC_V2: - model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 - else: - - class Config: - frozen = True - smart_union = True - extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/token_type_without_vault_default.py b/skyflow/generated/rest/types/token_type_without_vault_default.py deleted file mode 100644 index 53d71dc6..00000000 --- a/skyflow/generated/rest/types/token_type_without_vault_default.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -TokenTypeWithoutVaultDefault = typing.Union[typing.Literal["entity_only", "entity_unq_counter"], typing.Any] diff --git a/skyflow/generated/rest/types/transformations.py b/skyflow/generated/rest/types/transformations.py index 352df144..9895e2f6 100644 --- a/skyflow/generated/rest/types/transformations.py +++ b/skyflow/generated/rest/types/transformations.py @@ -4,18 +4,15 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel -from .transformations_shift_dates import TransformationsShiftDates +from .shift_dates import ShiftDates class Transformations(UniversalBaseModel): """ - Transformations to apply to the detected entities. + Transformations to apply to detected entities. """ - shift_dates: typing.Optional[TransformationsShiftDates] = pydantic.Field(default=None) - """ - Shift dates by a specified number of days. - """ + shift_dates: typing.Optional[ShiftDates] = None if IS_PYDANTIC_V2: model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 diff --git a/skyflow/generated/rest/types/transformations_shift_dates_entity_types_item.py b/skyflow/generated/rest/types/transformations_shift_dates_entity_types_item.py deleted file mode 100644 index f8d98df6..00000000 --- a/skyflow/generated/rest/types/transformations_shift_dates_entity_types_item.py +++ /dev/null @@ -1,5 +0,0 @@ -# This file was auto-generated by Fern from our API Definition. - -import typing - -TransformationsShiftDatesEntityTypesItem = typing.Union[typing.Literal["date", "date_interval", "dob"], typing.Any] diff --git a/skyflow/generated/rest/types/word_character_count.py b/skyflow/generated/rest/types/word_character_count.py new file mode 100644 index 00000000..d2506866 --- /dev/null +++ b/skyflow/generated/rest/types/word_character_count.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from ..core.serialization import FieldMetadata + + +class WordCharacterCount(UniversalBaseModel): + """ + Word and character count of the processed text. + """ + + word_count: typing_extensions.Annotated[typing.Optional[int], FieldMetadata(alias="wordCount")] = pydantic.Field( + default=None + ) + """ + Number of words in the processed text. + """ + + character_count: typing_extensions.Annotated[typing.Optional[int], FieldMetadata(alias="characterCount")] = ( + pydantic.Field(default=None) + ) + """ + Number of characters in the processed text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/version.py b/skyflow/generated/rest/version.py index e5d18b20..a93039be 100644 --- a/skyflow/generated/rest/version.py +++ b/skyflow/generated/rest/version.py @@ -1 +1,3 @@ -__version__ = "2.0.9" +from importlib import metadata + +__version__ = metadata.version("skyflow") diff --git a/skyflow/utils/__init__.py b/skyflow/utils/__init__.py index 67905512..f2788b11 100644 --- a/skyflow/utils/__init__.py +++ b/skyflow/utils/__init__.py @@ -1,4 +1,4 @@ -from ..utils.enums import LogLevel, Env +from ..utils.enums import LogLevel, Env, TokenType from ._skyflow_messages import SkyflowMessages from ._version import SDK_VERSION from ._helpers import get_base_url, format_scope diff --git a/skyflow/utils/_utils.py b/skyflow/utils/_utils.py index 114079b5..899dd1b4 100644 --- a/skyflow/utils/_utils.py +++ b/skyflow/utils/_utils.py @@ -14,7 +14,7 @@ from skyflow.error import SkyflowError from skyflow.generated.rest import V1UpdateRecordResponse, V1BulkDeleteRecordResponse, \ V1DetokenizeResponse, V1TokenizeResponse, V1GetQueryResponse, V1BulkGetRecordResponse, \ - DeidentifyStringResponse, ReidentifyStringResponse, ErrorResponse + DeidentifyStringResponse, ErrorResponse, IdentifyResponse from skyflow.generated.rest.core.http_response import HttpResponse from skyflow.utils.logger import log_error_log from skyflow.vault.detect import DeidentifyTextResponse, ReidentifyTextResponse @@ -90,12 +90,12 @@ def convert_detected_entity_to_entity_info(detected_entity): token=detected_entity.token, value=detected_entity.value, text_index=TextIndex( - start=detected_entity.location.start_index, - end=detected_entity.location.end_index + start=detected_entity.location['start_index'], + end=detected_entity.location['end_index'] ), processed_index=TextIndex( - start=detected_entity.location.start_index_processed, - end=detected_entity.location.end_index_processed + start=detected_entity.location['start_index_processed'], + end=detected_entity.location['end_index_processed'] ), entity=detected_entity.entity_type, scores=detected_entity.entity_scores @@ -388,7 +388,7 @@ def parse_deidentify_text_response(api_response: DeidentifyStringResponse): char_count=api_response.character_count ) -def parse_reidentify_text_response(api_response: ReidentifyStringResponse): +def parse_reidentify_text_response(api_response: IdentifyResponse): return ReidentifyTextResponse(api_response.text) def log_and_reject_error(description, status_code, request_id, http_status=None, grpc_code=None, details=None, logger = None): diff --git a/skyflow/utils/validations/_validations.py b/skyflow/utils/validations/_validations.py index 4428d11e..3da8b3a4 100644 --- a/skyflow/utils/validations/_validations.py +++ b/skyflow/utils/validations/_validations.py @@ -1,7 +1,6 @@ import base64 import json import os -from skyflow.generated.rest import TokenType from skyflow.service_account import is_expired from skyflow.utils.enums import LogLevel, Env, RedactionType, TokenMode, DetectEntities, DetectOutputTranscriptions, \ MaskingMethod diff --git a/skyflow/vault/controller/_detect.py b/skyflow/vault/controller/_detect.py index 62d551c1..b22a76b3 100644 --- a/skyflow/vault/controller/_detect.py +++ b/skyflow/vault/controller/_detect.py @@ -3,10 +3,11 @@ import os import base64 import time -from skyflow.generated.rest import DeidentifyTextRequestFile, DeidentifyAudioRequestFile, DeidentifyPdfRequestFile, \ - DeidentifyImageRequestFile, DeidentifyPresentationRequestFile, DeidentifySpreadsheetRequestFile, \ - DeidentifyDocumentRequestFile, DeidentifyFileRequestFile -from skyflow.generated.rest.types.deidentify_status_response import DeidentifyStatusResponse + +from skyflow.generated.rest import FileDataDeidentifyText, FileDataDeidentifyPdf, FileDataDeidentifyPresentation, \ + FileDataDeidentifySpreadsheet, FileDataDeidentifyDocument, FileDataDeidentifyStructuredText, FileData, \ + FileDataDeidentifyImage, Format, FileDataDeidentifyAudio, WordCharacterCount, DetectRunsResponse + from skyflow.utils._skyflow_messages import SkyflowMessages from skyflow.utils._utils import get_attribute, get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response from skyflow.utils.constants import SKY_META_DATA_HEADER @@ -14,7 +15,6 @@ from skyflow.utils.validations import validate_deidentify_file_request, validate_get_detect_run_request from skyflow.utils.validations._validations import validate_deidentify_text_request, validate_reidentify_text_request from typing import Dict, Any -from skyflow.generated.rest.strings.types.reidentify_string_request_format import ReidentifyStringRequestFormat from skyflow.vault.detect import DeidentifyTextRequest, DeidentifyTextResponse, ReidentifyTextRequest, \ ReidentifyTextResponse, DeidentifyFileRequest, DeidentifyFileResponse, GetDetectRunRequest @@ -46,7 +46,7 @@ def ___build_deidentify_text_body(self, request: DeidentifyTextRequest) -> Dict[ return deidentify_text_body def ___build_reidentify_text_body(self, request: ReidentifyTextRequest) -> Dict[str, Any]: - parsed_format = ReidentifyStringRequestFormat( + parsed_format = Format( redacted=request.redacted_entities, masked=request.masked_entities, plaintext=request.plain_text_entities @@ -84,7 +84,7 @@ def __poll_for_processed_file(self, run_id, max_wait_time=64): except Exception as e: raise e - def __save_deidentify_file_response_output(self, response: DeidentifyStatusResponse, output_directory: str, original_file_name: str, name_without_ext: str): + def __save_deidentify_file_response_output(self, response: DetectRunsResponse, output_directory: str, original_file_name: str, name_without_ext: str): if not response or not hasattr(response, 'output') or not response.output or not output_directory: return @@ -129,10 +129,10 @@ def __parse_deidentify_file_response(self, data, run_id=None, status=None): word_count = None char_count = None - word_character_count = getattr(data, "wordCharacterCount", None) - if word_character_count and isinstance(word_character_count, dict): - word_count = word_character_count.get("wordCount") - char_count = word_character_count.get("characterCount") + word_character_count = getattr(data, "word_character_count", None) + if word_character_count and isinstance(word_character_count, WordCharacterCount): + word_count = word_character_count.word_count + char_count = word_character_count.character_count size = getattr(data, "size", None) @@ -142,23 +142,20 @@ def __parse_deidentify_file_response(self, data, run_id=None, status=None): pages = getattr(data, "pages", None) slides = getattr(data, "slides", None) - # Convert output to list of dicts, prefer camelCase keys def output_to_dict_list(output): result = [] for o in output: if isinstance(o, dict): result.append({ - "file": o.get("processedFile") or o.get("processed_file"), - "type": o.get("processedFileType") or o.get("processed_file_type"), - "extension": o.get("processedFileExtension") or o.get("processed_file_extension") + "file": o.get("processed_file"), + "type": o.get("processed_file_type"), + "extension": o.get("processed_file_extension") }) else: result.append({ - "file": getattr(o, "processedFile", None) or getattr(o, "processed_file", None), - "type": getattr(o, "processedFileType", None) or getattr(o, "processed_file_type", None), - "extension": getattr(o, "processedFileExtension", None) or getattr(o, - "processed_file_extension", - None) + "file": getattr(o, "processed_file", None), + "type": getattr(o, "processed_file_type", None), + "extension": getattr(o, "processed_file_extension", None) }) return result @@ -200,7 +197,6 @@ def __get_token_format(self, request): 'default': getattr(request.token_format, "default", None), 'entity_unq_counter': getattr(request.token_format, "entity_unique_counter", None), 'entity_only': getattr(request.token_format, "entity_only", None), - 'vault_token': getattr(request.token_format, "vault_token", None) } def __get_transformations(self, request): @@ -293,7 +289,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): try: if file_extension == 'txt': - req_file = DeidentifyTextRequestFile(base_64=base64_string, data_format="txt") + req_file = FileDataDeidentifyText(base_64=base64_string, data_format="txt") api_call = files_api.deidentify_text api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -307,7 +303,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension in ['mp3', 'wav']: - req_file = DeidentifyAudioRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifyAudio(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_audio api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -327,7 +323,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension == 'pdf': - req_file = DeidentifyPdfRequestFile(base_64=base64_string) + req_file = FileDataDeidentifyPdf(base_64=base64_string) api_call = files_api.deidentify_pdf api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -342,7 +338,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension in ['jpeg', 'jpg', 'png', 'bmp', 'tif', 'tiff']: - req_file = DeidentifyImageRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifyImage(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_image api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -358,7 +354,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension in ['ppt', 'pptx']: - req_file = DeidentifyPresentationRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifyPresentation(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_presentation api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -371,7 +367,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension in ['csv', 'xls', 'xlsx']: - req_file = DeidentifySpreadsheetRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifySpreadsheet(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_spreadsheet api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -380,12 +376,11 @@ def deidentify_file(self, request: DeidentifyFileRequest): 'token_type': self.__get_token_format(request), 'allow_regex': request.allow_regex_list, 'restrict_regex': request.restrict_regex_list, - 'transformations': self.__get_transformations(request), 'request_options': self.__get_headers() } elif file_extension in ['doc', 'docx']: - req_file = DeidentifyDocumentRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifyDocument(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_document api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -398,9 +393,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } elif file_extension in ['json', 'xml']: - from skyflow.generated.rest.files.types.deidentify_structured_text_request_file import \ - DeidentifyStructuredTextRequestFile - req_file = DeidentifyStructuredTextRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileDataDeidentifyStructuredText(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_structured_text api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), @@ -414,7 +407,7 @@ def deidentify_file(self, request: DeidentifyFileRequest): } else: - req_file = DeidentifyFileRequestFile(base_64=base64_string, data_format=file_extension) + req_file = FileData(base_64=base64_string, data_format=file_extension) api_call = files_api.deidentify_file api_kwargs = { 'vault_id': self.__vault_client.get_vault_id(), diff --git a/tests/utils/test__utils.py b/tests/utils/test__utils.py index 6eaacf47..6fc3ae68 100644 --- a/tests/utils/test__utils.py +++ b/tests/utils/test__utils.py @@ -494,12 +494,12 @@ def test_parse_deidentify_text_response(self): mock_entity.value = "sensitive_value" mock_entity.entity_type = "EMAIL" mock_entity.entity_scores = {"EMAIL": 0.95} - mock_entity.location = Mock( - start_index=10, - end_index=20, - start_index_processed=15, - end_index_processed=25 - ) + mock_entity.location = { + "start_index": 10, + "end_index": 20, + "start_index_processed": 15, + "end_index_processed":25 + } mock_api_response = Mock() mock_api_response.processed_text = "Sample processed text" @@ -555,12 +555,12 @@ def test__convert_detected_entity_to_entity_info(self): mock_detected_entity.value = "sensitive_value" mock_detected_entity.entity_type = "EMAIL" mock_detected_entity.entity_scores = {"EMAIL": 0.95} - mock_detected_entity.location = Mock( - start_index=10, - end_index=20, - start_index_processed=15, - end_index_processed=25 - ) + mock_detected_entity.location = { + "start_index": 10, + "end_index": 20, + "start_index_processed": 15, + "end_index_processed":25 + } result = convert_detected_entity_to_entity_info(mock_detected_entity) @@ -580,12 +580,12 @@ def test__convert_detected_entity_to_entity_info_with_minimal_data(self): mock_detected_entity.value = None mock_detected_entity.entity_type = "UNKNOWN" mock_detected_entity.entity_scores = {} - mock_detected_entity.location = Mock( - start_index=0, - end_index=0, - start_index_processed=0, - end_index_processed=0 - ) + mock_detected_entity.location = { + "start_index": 0, + "end_index": 0, + "start_index_processed":0, + "end_index_processed":0 + } result = convert_detected_entity_to_entity_info(mock_detected_entity) diff --git a/tests/vault/controller/test__detect.py b/tests/vault/controller/test__detect.py index dc3a753f..c2f9a861 100644 --- a/tests/vault/controller/test__detect.py +++ b/tests/vault/controller/test__detect.py @@ -3,6 +3,7 @@ import base64 import os from skyflow.error import SkyflowError +from skyflow.generated.rest import WordCharacterCount from skyflow.utils import SkyflowMessages from skyflow.vault.controller import Detect from skyflow.vault.detect import DeidentifyTextRequest, ReidentifyTextRequest, \ @@ -149,7 +150,7 @@ def test_deidentify_file_txt_success(self, mock_open, mock_basename, mock_base64 processed_response = Mock() processed_response.status = "SUCCESS" processed_response.output = [] - processed_response.wordCharacterCount = Mock(wordCount=1, characterCount=1) + processed_response.word_character_count = WordCharacterCount(word_count=1, character_count=1) with patch.object(self.detect, "_Detect__poll_for_processed_file", return_value=processed_response) as mock_poll, \ patch.object(self.detect, "_Detect__parse_deidentify_file_response", @@ -211,7 +212,7 @@ def test_deidentify_file_audio_success(self, mock_base64, mock_validate): processed_response = Mock() processed_response.status = "SUCCESS" processed_response.output = [] - processed_response.wordCharacterCount = Mock(wordCount=1, characterCount=1) + processed_response.word_character_count = Mock(word_count=1, character_count=1) with patch.object(self.detect, "_Detect__poll_for_processed_file", return_value=processed_response) as mock_poll, \ patch.object(self.detect, "_Detect__parse_deidentify_file_response", @@ -295,16 +296,15 @@ def test_deidentify_file_all_branches(self, mock_poll, mock_open, mock_basename, processed_response = Mock() processed_response.status = "SUCCESS" processed_response.output = [ - {"processedFile": "dGVzdCBjb250ZW50", "processedFileType": "pdf", "processedFileExtension": "pdf"} + {"processed_file": "dGVzdCBjb250ZW50", "processed_file_type": "pdf", "processed_file_extension": "pdf"} ] - processed_response.wordCharacterCount = Mock(wordCount=1, characterCount=1) processed_response.size = 1 processed_response.duration = 1 processed_response.pages = 1 processed_response.slides = 1 processed_response.message = "" processed_response.run_id = "runid123" - processed_response.wordCharacterCount = {"wordCount": 1, "characterCount": 1} + processed_response.word_character_count = WordCharacterCount(word_count=1, character_count=1) mock_poll.return_value = processed_response # Test configuration for different file types @@ -352,6 +352,7 @@ def test_deidentify_file_all_branches(self, mock_poll, mock_open, mock_basename, result = self.detect.deidentify_file(req) # Verify the result + print("Result : ", result) self.assertIsInstance(result, DeidentifyFileResponse) self.assertEqual(result.status, "SUCCESS") self.assertEqual(result.run_id, "runid123") @@ -661,7 +662,7 @@ def test_deidentify_file_using_file_path(self, mock_open, mock_basename, mock_ba processedFileType="txt", processedFileExtension="txt") ] - processed_response.wordCharacterCount = Mock(wordCount=1, characterCount=1) + processed_response.word_character_count = WordCharacterCount(word_count=1, character_count=1) # Test the method with patch.object(self.detect, "_Detect__poll_for_processed_file", From ac51edf830fe61cf2d06f4e4cdc62fc892a1b7cb Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Thu, 6 Nov 2025 14:27:42 +0000 Subject: [PATCH 34/42] [AUTOMATED] Private Release 1.15.8.dev0+08b535a --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a95dc1dd..d8473340 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.5.dev0+dab7b66' +current_version = '1.15.8.dev0+08b535a' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index f15769f8..72bda35b 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.5.dev0+dab7b66' \ No newline at end of file +SDK_VERSION = '1.15.8.dev0+08b535a' \ No newline at end of file From 4c277755032784d2a203eddca83afb1ef9ed5f8e Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Thu, 6 Nov 2025 21:12:28 +0530 Subject: [PATCH 35/42] SK-2353: fix indentation --- skyflow/vault/controller/_detect.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/skyflow/vault/controller/_detect.py b/skyflow/vault/controller/_detect.py index b22a76b3..44ef2540 100644 --- a/skyflow/vault/controller/_detect.py +++ b/skyflow/vault/controller/_detect.py @@ -3,11 +3,9 @@ import os import base64 import time - from skyflow.generated.rest import FileDataDeidentifyText, FileDataDeidentifyPdf, FileDataDeidentifyPresentation, \ FileDataDeidentifySpreadsheet, FileDataDeidentifyDocument, FileDataDeidentifyStructuredText, FileData, \ FileDataDeidentifyImage, Format, FileDataDeidentifyAudio, WordCharacterCount, DetectRunsResponse - from skyflow.utils._skyflow_messages import SkyflowMessages from skyflow.utils._utils import get_attribute, get_metrics, handle_exception, parse_deidentify_text_response, parse_reidentify_text_response from skyflow.utils.constants import SKY_META_DATA_HEADER @@ -18,7 +16,6 @@ from skyflow.vault.detect import DeidentifyTextRequest, DeidentifyTextResponse, ReidentifyTextRequest, \ ReidentifyTextResponse, DeidentifyFileRequest, DeidentifyFileResponse, GetDetectRunRequest - class Detect: def __init__(self, vault_client): self.__vault_client = vault_client From 9e85bbab0e1dd0127f317490675a76564a132f25 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Thu, 6 Nov 2025 15:43:05 +0000 Subject: [PATCH 36/42] [AUTOMATED] Private Release 1.15.8.dev0+4c27775 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d8473340..5b22b2ac 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.8.dev0+08b535a' +current_version = '1.15.8.dev0+4c27775' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index 72bda35b..a1a651c8 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.8.dev0+08b535a' \ No newline at end of file +SDK_VERSION = '1.15.8.dev0+4c27775' \ No newline at end of file From ba735537d2780e3ff22893b45db9c25763f5930e Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Thu, 6 Nov 2025 23:19:31 +0530 Subject: [PATCH 37/42] SK-2385: add locations to the generated code --- skyflow/generated/rest/__init__.py | 2 + skyflow/generated/rest/types/__init__.py | 2 + skyflow/generated/rest/types/locations.py | 41 +++++++++++++++++++ .../rest/types/string_response_entities.py | 2 + skyflow/utils/_utils.py | 8 ++-- tests/utils/test__utils.py | 36 ++++++++-------- 6 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 skyflow/generated/rest/types/locations.py diff --git a/skyflow/generated/rest/__init__.py b/skyflow/generated/rest/__init__.py index 8a59c25d..bdb94808 100644 --- a/skyflow/generated/rest/__init__.py +++ b/skyflow/generated/rest/__init__.py @@ -48,6 +48,7 @@ GooglerpcStatus, HttpCode, IdentifyResponse, + Locations, ProtobufAny, RedactionEnumRedaction, ReidentifiedFileOutput, @@ -201,6 +202,7 @@ "HttpCode", "IdentifyResponse", "InternalServerError", + "Locations", "NotFoundError", "ProtobufAny", "RecordServiceBulkGetRecordRequestOrderBy", diff --git a/skyflow/generated/rest/types/__init__.py b/skyflow/generated/rest/types/__init__.py index 75979081..11929765 100644 --- a/skyflow/generated/rest/types/__init__.py +++ b/skyflow/generated/rest/types/__init__.py @@ -47,6 +47,7 @@ from .googlerpc_status import GooglerpcStatus from .http_code import HttpCode from .identify_response import IdentifyResponse +from .locations import Locations from .protobuf_any import ProtobufAny from .redaction_enum_redaction import RedactionEnumRedaction from .reidentified_file_output import ReidentifiedFileOutput @@ -146,6 +147,7 @@ "GooglerpcStatus", "HttpCode", "IdentifyResponse", + "Locations", "ProtobufAny", "RedactionEnumRedaction", "ReidentifiedFileOutput", diff --git a/skyflow/generated/rest/types/locations.py b/skyflow/generated/rest/types/locations.py new file mode 100644 index 00000000..7b8ba88e --- /dev/null +++ b/skyflow/generated/rest/types/locations.py @@ -0,0 +1,41 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel + + +class Locations(UniversalBaseModel): + """ + Locations of an entity in the text. + """ + + start_index: typing.Optional[int] = pydantic.Field(default=None) + """ + Index of the first character of the string in the original text. + """ + + end_index: typing.Optional[int] = pydantic.Field(default=None) + """ + Index of the last character of the string in the original text. + """ + + start_index_processed: typing.Optional[int] = pydantic.Field(default=None) + """ + Index of the first character of the string in the processed text. + """ + + end_index_processed: typing.Optional[int] = pydantic.Field(default=None) + """ + Index of the last character of the string in the processed text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/skyflow/generated/rest/types/string_response_entities.py b/skyflow/generated/rest/types/string_response_entities.py index 0d72524a..864d5a46 100644 --- a/skyflow/generated/rest/types/string_response_entities.py +++ b/skyflow/generated/rest/types/string_response_entities.py @@ -4,6 +4,7 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .locations import Locations class StringResponseEntities(UniversalBaseModel): @@ -21,6 +22,7 @@ class StringResponseEntities(UniversalBaseModel): Original text of the entity. """ + location: typing.Optional[Locations] = None entity_type: typing.Optional[str] = pydantic.Field(default=None) """ Highest-rated label. diff --git a/skyflow/utils/_utils.py b/skyflow/utils/_utils.py index 899dd1b4..4278357e 100644 --- a/skyflow/utils/_utils.py +++ b/skyflow/utils/_utils.py @@ -90,12 +90,12 @@ def convert_detected_entity_to_entity_info(detected_entity): token=detected_entity.token, value=detected_entity.value, text_index=TextIndex( - start=detected_entity.location['start_index'], - end=detected_entity.location['end_index'] + start=detected_entity.location.start_index, + end=detected_entity.location.end_index ), processed_index=TextIndex( - start=detected_entity.location['start_index_processed'], - end=detected_entity.location['end_index_processed'] + start=detected_entity.location.start_index_processed, + end=detected_entity.location.end_index_processed ), entity=detected_entity.entity_type, scores=detected_entity.entity_scores diff --git a/tests/utils/test__utils.py b/tests/utils/test__utils.py index 6fc3ae68..6eaacf47 100644 --- a/tests/utils/test__utils.py +++ b/tests/utils/test__utils.py @@ -494,12 +494,12 @@ def test_parse_deidentify_text_response(self): mock_entity.value = "sensitive_value" mock_entity.entity_type = "EMAIL" mock_entity.entity_scores = {"EMAIL": 0.95} - mock_entity.location = { - "start_index": 10, - "end_index": 20, - "start_index_processed": 15, - "end_index_processed":25 - } + mock_entity.location = Mock( + start_index=10, + end_index=20, + start_index_processed=15, + end_index_processed=25 + ) mock_api_response = Mock() mock_api_response.processed_text = "Sample processed text" @@ -555,12 +555,12 @@ def test__convert_detected_entity_to_entity_info(self): mock_detected_entity.value = "sensitive_value" mock_detected_entity.entity_type = "EMAIL" mock_detected_entity.entity_scores = {"EMAIL": 0.95} - mock_detected_entity.location = { - "start_index": 10, - "end_index": 20, - "start_index_processed": 15, - "end_index_processed":25 - } + mock_detected_entity.location = Mock( + start_index=10, + end_index=20, + start_index_processed=15, + end_index_processed=25 + ) result = convert_detected_entity_to_entity_info(mock_detected_entity) @@ -580,12 +580,12 @@ def test__convert_detected_entity_to_entity_info_with_minimal_data(self): mock_detected_entity.value = None mock_detected_entity.entity_type = "UNKNOWN" mock_detected_entity.entity_scores = {} - mock_detected_entity.location = { - "start_index": 0, - "end_index": 0, - "start_index_processed":0, - "end_index_processed":0 - } + mock_detected_entity.location = Mock( + start_index=0, + end_index=0, + start_index_processed=0, + end_index_processed=0 + ) result = convert_detected_entity_to_entity_info(mock_detected_entity) From 155a6895cc9d73368494f2b6a6055a707852215b Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Thu, 6 Nov 2025 17:50:50 +0000 Subject: [PATCH 38/42] [AUTOMATED] Private Release 1.15.8.dev0+ba73553 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5b22b2ac..903f488f 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.8.dev0+4c27775' +current_version = '1.15.8.dev0+ba73553' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index a1a651c8..661806df 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.8.dev0+4c27775' \ No newline at end of file +SDK_VERSION = '1.15.8.dev0+ba73553' \ No newline at end of file From 3e794e3979199f15638a3f3ced709b35e6bc4a69 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Fri, 7 Nov 2025 12:50:23 +0530 Subject: [PATCH 39/42] SK-2385: update output transcriptions enum --- skyflow/utils/enums/detect_output_transcriptions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skyflow/utils/enums/detect_output_transcriptions.py b/skyflow/utils/enums/detect_output_transcriptions.py index 69f94d79..4e14f911 100644 --- a/skyflow/utils/enums/detect_output_transcriptions.py +++ b/skyflow/utils/enums/detect_output_transcriptions.py @@ -4,5 +4,4 @@ class DetectOutputTranscriptions(Enum): DIARIZED_TRANSCRIPTION = "diarized_transcription" MEDICAL_DIARIZED_TRANSCRIPTION = "medical_diarized_transcription" MEDICAL_TRANSCRIPTION = "medical_transcription" - PLAINTEXT_TRANSCRIPTION = "plaintext_transcription" TRANSCRIPTION = "transcription" \ No newline at end of file From 342df8ba3b6c27380e343dd969617ef77628b059 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow Date: Fri, 7 Nov 2025 07:21:02 +0000 Subject: [PATCH 40/42] [AUTOMATED] Private Release 1.15.8.dev0+3e794e3 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 903f488f..2a7056bc 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.8.dev0+ba73553' +current_version = '1.15.8.dev0+3e794e3' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index 661806df..53455202 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.8.dev0+ba73553' \ No newline at end of file +SDK_VERSION = '1.15.8.dev0+3e794e3' \ No newline at end of file From a22b9c67b24d23c3b867c60d3b6547870a239b9c Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 11 Nov 2025 17:00:39 +0530 Subject: [PATCH 41/42] SK-1773 Add str for file upload response --- skyflow/vault/data/_file_upload_response.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/skyflow/vault/data/_file_upload_response.py b/skyflow/vault/data/_file_upload_response.py index 18218f08..91a43c26 100644 --- a/skyflow/vault/data/_file_upload_response.py +++ b/skyflow/vault/data/_file_upload_response.py @@ -4,3 +4,9 @@ def __init__(self, errors): self.skyflow_id = skyflow_id self.errors = errors + + def __repr__(self): + return f"FileUploadResponse(skyflow_id={self.skyflow_id}, errors={self.errors})" + + def __str__(self): + return self.__repr__() \ No newline at end of file From 255c8eca5327cee6652b482bcbabd93696189699 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 11 Nov 2025 11:31:11 +0000 Subject: [PATCH 42/42] [AUTOMATED] Private Release 1.15.8.dev0+a22b9c6 --- setup.py | 2 +- skyflow/utils/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2a7056bc..2c8f3e19 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ if sys.version_info < (3, 8): raise RuntimeError("skyflow requires Python 3.8+") -current_version = '1.15.8.dev0+3e794e3' +current_version = '1.15.8.dev0+a22b9c6' setup( name='skyflow', diff --git a/skyflow/utils/_version.py b/skyflow/utils/_version.py index 53455202..da343d6c 100644 --- a/skyflow/utils/_version.py +++ b/skyflow/utils/_version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.8.dev0+3e794e3' \ No newline at end of file +SDK_VERSION = '1.15.8.dev0+a22b9c6' \ No newline at end of file