From ce6c329cef6c31a8a4db7ffa1203456b3df1faf6 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow <156889717+saileshwar-skyflow@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:03:18 +0530 Subject: [PATCH 01/28] SK-1407: Added pr review template (#142) --- .github/workflows/pull_request_template.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/pull_request_template.md diff --git a/.github/workflows/pull_request_template.md b/.github/workflows/pull_request_template.md new file mode 100644 index 00000000..c1922bec --- /dev/null +++ b/.github/workflows/pull_request_template.md @@ -0,0 +1,22 @@ +Start with a concise summary of the PR. The first three sections are required. The questions present in each section is there to help you guide you what to add. They are meant to be overwritten by your comments. +## Why +- Why are you making the change? +- What is the underlying issue that you are trying to case, in case of fix? +- Why is it needed by the feature you are working on? +- What is the intent behind making the change? + +## Goal +- What is the intended outcome? +- What part of the feature should start working? +- What are the non-goals or will be covered in future PR? + +## Testing +- How was the code tested? +- If you haven't written unit tests, why? +- What more testing is needed? Do you intend to manually test it after deployment? +- Do you have any concerns if this changed is released to prod? + +## Tech debt +- Is the PR adding to tech debt in any way? +- Are you addressing some Tech debt in this PR? +- If both the above are false, feel free to remove this section. \ No newline at end of file From 3d25ef896110b95777253efda069184890d88b19 Mon Sep 17 00:00:00 2001 From: saileshwar-skyflow <156889717+saileshwar-skyflow@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:47:50 +0530 Subject: [PATCH 02/28] SK-1772: Update commit message pattern (#152) * SK-1772: Update commit message pattern --- .github/workflows/ci.yml | 4 ++-- .github/workflows/main.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47a7281e..06aa9a50 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: - name: Check JIRA ID uses: gsactions/commit-message-checker@v1 with: - pattern: '\[?[A-Z]{1,5}-[1-9][0-9]*.+$' + pattern: '(\[?[A-Z]{1,5}-[1-9][0-9]*)|(\[AUTOMATED\])|(Merge)|(Release).+$' flags: 'gm' excludeDescription: 'true' checkAllCommitMessages: 'true' @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.8' - name: create-json id: create-json diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 68d9b0a8..48472e78 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.8' - name: create-json id: create-json From 52a4f566ca583e05d5404c5b34a63fb81324fdc9 Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Mon, 24 Feb 2025 15:34:22 +0530 Subject: [PATCH 03/28] SC-5131:Update semgrep.yml (#159) --- .github/workflows/semgrep.yml | 63 +++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index bce5fc8e..019095aa 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -1,35 +1,64 @@ -name: Semgrep +name: Semgrep Scan -# Run workflow each time code is pushed to your repository. on: - push: - branches: - - main pull_request: branches: - main + jobs: build: runs-on: ubuntu-latest + permissions: + pull-requests: write # Give write permission to PRs + issues: write steps: - name: Checkout code uses: actions/checkout@v3 - - name: Install Semgrep - run: pip install semgrep + - name: Install Semgrep and jq + run: | + sudo apt install python3-venv jq + python3 -m venv .venv + .venv/bin/pip install semgrep - name: Run Semgrep run: | - semgrep --config .semgreprules/customRule.yml --config auto --severity ERROR --sarif . > results.sarif + source .venv/bin/activate + semgrep --config auto --severity ERROR --json-output=results.json --no-error + cat results.json | jq .results > pretty-results.json - - name: Upload SARIF file - uses: github/codeql-action/upload-sarif@v3 + - name: Display Raw Semgrep JSON Output + run: | + echo "Displaying raw Semgrep results..." + cat pretty-results.json + + - name: Add comment on PR if findings are found + uses: actions/github-script@v6 with: - # Path to SARIF file relative to the root of the repository - sarif_file: results.sarif + script: | + // Ensure the context has a pull_request + if (context.payload.pull_request) { + const prNumber = context.payload.pull_request.number; + const fs = require('fs'); + const results = JSON.parse(fs.readFileSync('pretty-results.json', 'utf8')); + const highFindings = results.filter(result => result.extra && result.extra.severity === 'ERROR'); - - name: Upload results - uses: actions/upload-artifact@v4 - with: - name: semgrep-results - path: results.sarif + // Comment if findings exist + if (highFindings.length > 0) { + const comment = `**Semgrep Findings:** Issues with Error level severity are found (Error is Highest severity in Semgrep), Please resolve the issues before merging.`; + await github.rest.issues.createComment({ + ...context.repo, + issue_number: prNumber, + body: comment + }); + } else { + const noIssuesComment = "**Semgrep findings:** No issues found, Good to merge."; + await github.rest.issues.createComment({ + ...context.repo, + issue_number: prNumber, + body: noIssuesComment + }); + } + } else { + console.log("This workflow wasn't triggered by a pull request, so no comment will be added."); + } From 60c46c1aa908cb563d676dd4554e0c5e248ef3b1 Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:18:01 +0530 Subject: [PATCH 04/28] SC-5790:Create Gitleaks.yml --- .github/workflows/Gitleaks.yml | 97 ++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 .github/workflows/Gitleaks.yml diff --git a/.github/workflows/Gitleaks.yml b/.github/workflows/Gitleaks.yml new file mode 100644 index 00000000..9807b5e9 --- /dev/null +++ b/.github/workflows/Gitleaks.yml @@ -0,0 +1,97 @@ +name: Gitleaks secrets scan + +on: + pull_request: + branches: + - main + + +permissions: + issues: write + pull-requests: write + contents: read + +jobs: + gitleaks: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required to get full commit history for diffing + + + - name: Get base and head commit SHAs + run: | + echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV + echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + + + - name: Run Gitleaks on PR changes via Docker + run: | + docker run --rm -v $(pwd):/repo -w /repo zricethezav/gitleaks:latest detect \ + --config="/repo/Rule/gitleaks.toml" \ + --log-opts="--no-merges $BASE_SHA..$HEAD_SHA" \ + --verbose \ + --exit-code=0 \ + --report-format=json \ + --report-path="/repo/gitleaks-report.json" \ + --redact + + - name: Upload Gitleaks report + uses: actions/upload-artifact@v4 + with: + name: gitleaks-report + path: gitleaks-report.json + + - name: Format and comment findings on PR + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [ ! -f gitleaks-report.json ]; then + echo "Report file not found!" + exit 1 + fi + + FINDINGS_JSON=$(cat gitleaks-report.json) + COUNT=$(echo "$FINDINGS_JSON" | jq 'length') + SHA="${{ github.event.pull_request.head.sha }}" + REPO="${{ github.repository }}" + PR_NUMBER="${{ github.event.pull_request.number }}" + MAX=10 + + if [ "$COUNT" -gt 0 ]; then + COMMENT="**🔐 Gitleaks Findings: $COUNT issue(s) detected**\n\n" + i=0 + while [ "$i" -lt "$COUNT" ] && [ "$i" -lt "$MAX" ]; do + ITEM=$(echo "$FINDINGS_JSON" | jq ".[$i]") + RULE=$(echo "$ITEM" | jq -r '.RuleID') + DESC=$(echo "$ITEM" | jq -r '.Description') + FILE=$(echo "$ITEM" | jq -r '.File') + LINE=$(echo "$ITEM" | jq -r '.Line') + LINK="https://github.com/$REPO/blob/$SHA/$FILE#L$LINE" + SECRET_MASKED="**********" + COMMENT+="🔸 **Rule**: \`$RULE\`\n" + COMMENT+="📄 **File**: \`$FILE:$LINE\`\n" + COMMENT+="📝 **Description**: $DESC\n" + COMMENT+="🔑 **Secret**: \`$SECRET_MASKED\`\n" + COMMENT+="🔗 **Path**: [$FILE:$LINE]($LINK)\n\n" + i=$((i + 1)) + done + + if [ "$COUNT" -gt "$MAX" ]; then + COMMENT+="...and more. Only showing first $MAX findings.\n" + fi + else + COMMENT="✅ **Gitleaks Findings:** No secrets detected. Safe to proceed!" + fi + + # Escape newlines for GitHub API + COMMENT=$(echo "$COMMENT" | sed ':a;N;$!ba;s/\n/\\n/g') + + curl -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + -d "{\"body\":\"$COMMENT\"}" \ + "https://api.github.com/repos/${REPO}/issues/${PR_NUMBER}/comments" From ed57ecb376f8be4bfad80ca46b3e37080940f3bc Mon Sep 17 00:00:00 2001 From: Revanthathreya <143704329+Revanthathreya@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:19:06 +0530 Subject: [PATCH 05/28] SC-5790:Update and rename customRule.yml to gitleaks.toml --- .semgreprules/customRule.yml | 30 - Rule/gitleaks.toml | 3137 ++++++++++++++++++++++++++++++++++ 2 files changed, 3137 insertions(+), 30 deletions(-) delete mode 100644 .semgreprules/customRule.yml create mode 100644 Rule/gitleaks.toml diff --git a/.semgreprules/customRule.yml b/.semgreprules/customRule.yml deleted file mode 100644 index b275e280..00000000 --- a/.semgreprules/customRule.yml +++ /dev/null @@ -1,30 +0,0 @@ -rules: -- id: check-sensitive-info - message: >- - Potential sensitive information found: $1 - severity: ERROR - languages: - - yaml - - go - - javascript - - java - - python - - golang - - docker - patterns: - - pattern-regex: (?i)\b(api[_-]key|api[_-]token|api[_-]secret[_-]key|api[_-]password|token|secret[_-]key|password|auth[_-]key|auth[_-]token|AUTH_PASSWORD)\s*[:=]\s*(['"]?)((?!YOUR_EXCLUSION_PATTERN_HERE)[A-Z]+.*?)\2 - -- id: check-logger-appconfig - message: >- - Potential Logging configuration found: $1 - severity: ERROR - languages: - - yaml - - go - - javascript - - java - - python - - golang - - docker - patterns: - - pattern-regex: log\.Logger\(\).*(appConfig).* diff --git a/Rule/gitleaks.toml b/Rule/gitleaks.toml new file mode 100644 index 00000000..e127f827 --- /dev/null +++ b/Rule/gitleaks.toml @@ -0,0 +1,3137 @@ +# This file has been auto-generated. Do not edit manually. +# If you would like to contribute new rules, please use +# cmd/generate/config/main.go and follow the contributing guidelines +# at https://github.com/gitleaks/gitleaks/blob/master/CONTRIBUTING.md +# +# How the hell does secret scanning work? Read this: +# https://lookingatcomputer.substack.com/p/regex-is-almost-all-you-need +# +# This is the default gitleaks configuration file. +# Rules and allowlists are defined within this file. +# Rules instruct gitleaks on what should be considered a secret. +# Allowlists instruct gitleaks on what is allowed, i.e. not a secret. + +title = "gitleaks config" + +[allowlist] +description = "global allow lists" +regexes = [ + '''(?i)^true|false|null$''', + '''^(?i:a+|b+|c+|d+|e+|f+|g+|h+|i+|j+|k+|l+|m+|n+|o+|p+|q+|r+|s+|t+|u+|v+|w+|x+|y+|z+|\*+|\.+)$''', + '''^\$(?:\d+|{\d+})$''', + '''^\$(?:[A-Z_]+|[a-z_]+)$''', + '''^\${(?:[A-Z_]+|[a-z_]+)}$''', + '''^\{\{[ \t]*[\w ().|]+[ \t]*}}$''', + '''^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$''', + '''^%(?:[A-Z_]+|[a-z_]+)%$''', + '''^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$''', + '''^\{\d{0,2}}$''', + '''^@(?:[A-Z_]+|[a-z_]+)@$''', + '''^/Users/(?i)[a-z0-9]+/[\w .-/]+$''', + '''^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$''', +] +paths = [ + '''gitleaks\.toml''', + '''(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$''', + '''(?i)\.(?:eot|[ot]tf|woff2?)$''', + '''(?i)\.(?:docx?|xlsx?|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf|zip)$''', + '''go\.(?:mod|sum|work(?:\.sum)?)$''', + '''(?:^|/)vendor/modules\.txt$''', + '''(?:^|/)vendor/(?:github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(?:/.*)?$''', + '''(?:^|/)gradlew(?:\.bat)?$''', + '''(?:^|/)gradle\.lockfile$''', + '''(?:^|/)mvnw(?:\.cmd)?$''', + '''(?:^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$''', + '''(?:^|/)node_modules(?:/.*)?$''', + '''(?:^|/)(?:deno\.lock|npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$''', + '''(?:^|/)bower_components(?:/.*)?$''', + '''(?:^|/)(?:angular|bootstrap|jquery(?:-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(?:\.min)?\.js(?:\.map)?$''', + '''(?:^|/)javascript\.json$''', + '''(?:^|/)(?:Pipfile|poetry)\.lock$''', + '''(?i)(?:^|/)(?:v?env|virtualenv)/lib(?:64)?(?:/.*)?$''', + '''(?i)(?:^|/)(?:lib(?:64)?/python[23](?:\.\d{1,2})+|python/[23](?:\.\d{1,2})+/lib(?:64)?)(?:/.*)?$''', + '''(?i)(?:^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(?:/.+)?$''', + '''(?:^|/)vendor/(?:bundle|ruby)(?:/.*?)?$''', + '''\.gem$''', + '''verification-metadata\.xml''', + '''Database.refactorlog''', +] +stopwords = [ + "abcdefghijklmnopqrstuvwxyz", + "014df517-39d1-4453-b7b3-9930c563627c", +] + +[[rules]] +id = "1password-service-account-token" +description = "Uncovered a possible 1Password service account token, potentially compromising access to secrets in vaults." +regex = '''ops_eyJ[a-zA-Z0-9+/]{250,}={0,3}''' +entropy = 4 +keywords = ["ops_"] + +[[rules]] +id = "adafruit-api-key" +description = "Identified a potential Adafruit API Key, which could lead to unauthorized access to Adafruit services and sensitive data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:adafruit)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["adafruit"] + +[[rules]] +id = "adobe-client-id" +description = "Detected a pattern that resembles an Adobe OAuth Web Client ID, posing a risk of compromised Adobe integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:adobe)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["adobe"] + +[[rules]] +id = "adobe-client-secret" +description = "Discovered a potential Adobe Client Secret, which, if exposed, could allow unauthorized Adobe service access and data manipulation." +regex = '''\b(p8e-(?i)[a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["p8e-"] + +[[rules]] +id = "age-secret-key" +description = "Discovered a potential Age encryption tool secret key, risking data decryption and unauthorized access to sensitive information." +regex = '''AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}''' +keywords = ["age-secret-key-1"] + +[[rules]] +id = "airtable-api-key" +description = "Uncovered a possible Airtable API Key, potentially compromising database access and leading to data leakage or alteration." +regex = '''(?i)[\w.-]{0,50}?(?:airtable)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{17})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["airtable"] + +[[rules]] +id = "algolia-api-key" +description = "Identified an Algolia API Key, which could result in unauthorized search operations and data exposure on Algolia-managed platforms." +regex = '''(?i)[\w.-]{0,50}?(?:algolia)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["algolia"] + +[[rules]] +id = "alibaba-access-key-id" +description = "Detected an Alibaba Cloud AccessKey ID, posing a risk of unauthorized cloud resource access and potential data compromise." +regex = '''\b(LTAI(?i)[a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["ltai"] + +[[rules]] +id = "alibaba-secret-key" +description = "Discovered a potential Alibaba Cloud Secret Key, potentially allowing unauthorized operations and data access within Alibaba Cloud." +regex = '''(?i)[\w.-]{0,50}?(?:alibaba)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["alibaba"] + +[[rules]] +id = "asana-client-id" +description = "Discovered a potential Asana Client ID, risking unauthorized access to Asana projects and sensitive task information." +regex = '''(?i)[\w.-]{0,50}?(?:asana)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["asana"] + +[[rules]] +id = "asana-client-secret" +description = "Identified an Asana Client Secret, which could lead to compromised project management integrity and unauthorized access." +regex = '''(?i)[\w.-]{0,50}?(?:asana)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["asana"] + +[[rules]] +id = "atlassian-api-token" +description = "Detected an Atlassian API token, posing a threat to project management and collaboration tool security and data confidentiality." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:atlassian|confluence|jira)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-zA-Z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)|\b(ATATT3[A-Za-z0-9_\-=]{186})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "atlassian", + "confluence", + "jira", + "atatt3", +] + +[[rules]] +id = "authress-service-client-access-key" +description = "Uncovered a possible Authress Service Client Access Key, which may compromise access control services and sensitive data." +regex = '''\b((?:sc|ext|scauth|authress)_(?i)[a-z0-9]{5,30}\.[a-z0-9]{4,6}\.(?-i:acc)[_-][a-z0-9-]{10,32}\.[a-z0-9+/_=-]{30,120})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sc_", + "ext_", + "scauth_", + "authress_", +] + +[[rules]] +id = "aws-access-token" +description = "Identified a pattern that may indicate AWS credentials, risking unauthorized cloud resource access and data breaches on AWS platforms." +regex = '''\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16})\b''' +entropy = 3 +keywords = [ + "a3t", + "akia", + "asia", + "abia", + "acca", +] +[[rules.allowlists]] +regexes = [ + '''.+EXAMPLE$''', +] + +[[rules]] +id = "azure-ad-client-secret" +description = "Azure AD Client Secret" +regex = '''(?:^|[\\'"\x60\s>=:(,)])([a-zA-Z0-9_~.]{3}\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\'"\x60\s<),])''' +entropy = 3 +keywords = ["q~"] + +[[rules]] +id = "beamer-api-token" +description = "Detected a Beamer API token, potentially compromising content management and exposing sensitive notifications and updates." +regex = '''(?i)[\w.-]{0,50}?(?:beamer)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(b_[a-z0-9=_\-]{44})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["beamer"] + +[[rules]] +id = "bitbucket-client-id" +description = "Discovered a potential Bitbucket Client ID, risking unauthorized repository access and potential codebase exposure." +regex = '''(?i)[\w.-]{0,50}?(?:bitbucket)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bitbucket"] + +[[rules]] +id = "bitbucket-client-secret" +description = "Discovered a potential Bitbucket Client Secret, posing a risk of compromised code repositories and unauthorized access." +regex = '''(?i)[\w.-]{0,50}?(?:bitbucket)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bitbucket"] + +[[rules]] +id = "bittrex-access-key" +description = "Identified a Bittrex Access Key, which could lead to unauthorized access to cryptocurrency trading accounts and financial loss." +regex = '''(?i)[\w.-]{0,50}?(?:bittrex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bittrex"] + +[[rules]] +id = "bittrex-secret-key" +description = "Detected a Bittrex Secret Key, potentially compromising cryptocurrency transactions and financial security." +regex = '''(?i)[\w.-]{0,50}?(?:bittrex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["bittrex"] + +[[rules]] +id = "cisco-meraki-api-key" +description = "Cisco Meraki is a cloud-managed IT solution that provides networking, security, and device management through an easy-to-use interface." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Mm]eraki|MERAKI))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["meraki"] + +[[rules]] +id = "clojars-api-token" +description = "Uncovered a possible Clojars API token, risking unauthorized access to Clojure libraries and potential code manipulation." +regex = '''(?i)CLOJARS_[a-z0-9]{60}''' +entropy = 2 +keywords = ["clojars_"] + +[[rules]] +id = "cloudflare-api-key" +description = "Detected a Cloudflare API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:cloudflare)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["cloudflare"] + +[[rules]] +id = "cloudflare-global-api-key" +description = "Detected a Cloudflare Global API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:cloudflare)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{37})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["cloudflare"] + +[[rules]] +id = "cloudflare-origin-ca-key" +description = "Detected a Cloudflare Origin CA Key, potentially compromising cloud application deployments and operational security." +regex = '''\b(v1\.0-[a-f0-9]{24}-[a-f0-9]{146})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "cloudflare", + "v1.0-", +] + +[[rules]] +id = "codecov-access-token" +description = "Found a pattern resembling a Codecov Access Token, posing a risk of unauthorized access to code coverage reports and sensitive data." +regex = '''(?i)[\w.-]{0,50}?(?:codecov)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["codecov"] + +[[rules]] +id = "cohere-api-token" +description = "Identified a Cohere Token, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:cohere|CO_API_KEY)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-zA-Z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "cohere", + "co_api_key", +] + +[[rules]] +id = "coinbase-access-token" +description = "Detected a Coinbase Access Token, posing a risk of unauthorized access to cryptocurrency accounts and financial transactions." +regex = '''(?i)[\w.-]{0,50}?(?:coinbase)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["coinbase"] + +[[rules]] +id = "confluent-access-token" +description = "Identified a Confluent Access Token, which could compromise access to streaming data platforms and sensitive data flow." +regex = '''(?i)[\w.-]{0,50}?(?:confluent)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["confluent"] + +[[rules]] +id = "confluent-secret-key" +description = "Found a Confluent Secret Key, potentially risking unauthorized operations and data access within Confluent services." +regex = '''(?i)[\w.-]{0,50}?(?:confluent)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["confluent"] + +[[rules]] +id = "contentful-delivery-api-token" +description = "Discovered a Contentful delivery API token, posing a risk to content management systems and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:contentful)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{43})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["contentful"] + +[[rules]] +id = "curl-auth-header" +description = "Discovered a potential authorization token provided in a curl command header, which could compromise the curl accessed resource." +regex = '''\bcurl\b(?:.*?|.*?(?:[\r\n]{1,2}.*?){1,5})[ \t\n\r](?:-H|--header)(?:=|[ \t]{0,5})(?:"(?i)(?:Authorization:[ \t]{0,5}(?:Basic[ \t]([a-z0-9+/]{8,}={0,3})|(?:Bearer|(?:Api-)?Token)[ \t]([\w=~@.+/-]{8,})|([\w=~@.+/-]{8,}))|(?:(?:X-(?:[a-z]+-)?)?(?:Api-?)?(?:Key|Token)):[ \t]{0,5}([\w=~@.+/-]{8,}))"|'(?i)(?:Authorization:[ \t]{0,5}(?:Basic[ \t]([a-z0-9+/]{8,}={0,3})|(?:Bearer|(?:Api-)?Token)[ \t]([\w=~@.+/-]{8,})|([\w=~@.+/-]{8,}))|(?:(?:X-(?:[a-z]+-)?)?(?:Api-?)?(?:Key|Token)):[ \t]{0,5}([\w=~@.+/-]{8,}))')(?:\B|\s|\z)''' +entropy = 2.75 +keywords = ["curl"] + +[[rules]] +id = "curl-auth-user" +description = "Discovered a potential basic authorization token provided in a curl command, which could compromise the curl accessed resource." +regex = '''\bcurl\b(?:.*|.*(?:[\r\n]{1,2}.*){1,5})[ \t\n\r](?:-u|--user)(?:=|[ \t]{0,5})("(:[^"]{3,}|[^:"]{3,}:|[^:"]{3,}:[^"]{3,})"|'([^:']{3,}:[^']{3,})'|((?:"[^"]{3,}"|'[^']{3,}'|[\w$@.-]+):(?:"[^"]{3,}"|'[^']{3,}'|[\w${}@.-]+)))(?:\s|\z)''' +entropy = 2 +keywords = ["curl"] +[[rules.allowlists]] +regexes = [ + '''[^:]+:(?:change(?:it|me)|pass(?:word)?|pwd|test|token|\*+|x+)''', + '''['"]?<[^>]+>['"]?:['"]?<[^>]+>|<[^:]+:[^>]+>['"]?''', + '''[^:]+:\[[^]]+]''', + '''['"]?[^:]+['"]?:['"]?\$(?:\d|\w+|\{(?:\d|\w+)})['"]?''', + '''\$\([^)]+\):\$\([^)]+\)''', + '''['"]?\$?{{[^}]+}}['"]?:['"]?\$?{{[^}]+}}['"]?''', +] + +[[rules]] +id = "databricks-api-token" +description = "Uncovered a Databricks API token, which may compromise big data analytics platforms and sensitive data processing." +regex = '''\b(dapi[a-f0-9]{32}(?:-\d)?)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["dapi"] + +[[rules]] +id = "datadog-access-token" +description = "Detected a Datadog Access Token, potentially risking monitoring and analytics data exposure and manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:datadog)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["datadog"] + +[[rules]] +id = "defined-networking-api-token" +description = "Identified a Defined Networking API token, which could lead to unauthorized network operations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:dnkey)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(dnkey-[a-z0-9=_\-]{26}-[a-z0-9=_\-]{52})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dnkey"] + +[[rules]] +id = "digitalocean-access-token" +description = "Found a DigitalOcean OAuth Access Token, risking unauthorized cloud resource access and data compromise." +regex = '''\b(doo_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["doo_v1_"] + +[[rules]] +id = "digitalocean-pat" +description = "Discovered a DigitalOcean Personal Access Token, posing a threat to cloud infrastructure security and data privacy." +regex = '''\b(dop_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["dop_v1_"] + +[[rules]] +id = "digitalocean-refresh-token" +description = "Uncovered a DigitalOcean OAuth Refresh Token, which could allow prolonged unauthorized access and resource manipulation." +regex = '''(?i)\b(dor_v1_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dor_v1_"] + +[[rules]] +id = "discord-api-token" +description = "Detected a Discord API key, potentially compromising communication channels and user data privacy on Discord." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["discord"] + +[[rules]] +id = "discord-client-id" +description = "Identified a Discord client ID, which may lead to unauthorized integrations and data exposure in Discord applications." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{18})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["discord"] + +[[rules]] +id = "discord-client-secret" +description = "Discovered a potential Discord client secret, risking compromised Discord bot integrations and data leaks." +regex = '''(?i)[\w.-]{0,50}?(?:discord)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["discord"] + +[[rules]] +id = "doppler-api-token" +description = "Discovered a Doppler API token, posing a risk to environment and secrets management security." +regex = '''dp\.pt\.(?i)[a-z0-9]{43}''' +entropy = 2 +keywords = ["dp.pt."] + +[[rules]] +id = "droneci-access-token" +description = "Detected a Droneci Access Token, potentially compromising continuous integration and deployment workflows." +regex = '''(?i)[\w.-]{0,50}?(?:droneci)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["droneci"] + +[[rules]] +id = "dropbox-api-token" +description = "Identified a Dropbox API secret, which could lead to unauthorized file access and data breaches in Dropbox storage." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{15})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "dropbox-long-lived-api-token" +description = "Found a Dropbox long-lived API token, risking prolonged unauthorized access to cloud storage and sensitive data." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{11}(AAAAAAAAAA)[a-z0-9\-_=]{43})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "dropbox-short-lived-api-token" +description = "Discovered a Dropbox short-lived API token, posing a risk of temporary but potentially harmful data access and manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:dropbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(sl\.[a-z0-9\-=_]{135})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["dropbox"] + +[[rules]] +id = "duffel-api-token" +description = "Uncovered a Duffel API token, which may compromise travel platform integrations and sensitive customer data." +regex = '''duffel_(?:test|live)_(?i)[a-z0-9_\-=]{43}''' +entropy = 2 +keywords = ["duffel_"] + +[[rules]] +id = "dynatrace-api-token" +description = "Detected a Dynatrace API token, potentially risking application performance monitoring and data exposure." +regex = '''dt0c01\.(?i)[a-z0-9]{24}\.[a-z0-9]{64}''' +entropy = 4 +keywords = ["dt0c01."] + +[[rules]] +id = "easypost-api-token" +description = "Identified an EasyPost API token, which could lead to unauthorized postal and shipment service access and data exposure." +regex = '''\bEZAK(?i)[a-z0-9]{54}\b''' +entropy = 2 +keywords = ["ezak"] + +[[rules]] +id = "easypost-test-api-token" +description = "Detected an EasyPost test API token, risking exposure of test environments and potentially sensitive shipment data." +regex = '''\bEZTK(?i)[a-z0-9]{54}\b''' +entropy = 2 +keywords = ["eztk"] + +[[rules]] +id = "etsy-access-token" +description = "Found an Etsy Access Token, potentially compromising Etsy shop management and customer data." +regex = '''(?i)[\w.-]{0,50}?(?:(?-i:ETSY|[Ee]tsy))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["etsy"] + +[[rules]] +id = "facebook-access-token" +description = "Discovered a Facebook Access Token, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''(?i)\b(\d{15,16}(\||%)[0-9a-z\-_]{27,40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["facebook"] + +[[rules]] +id = "facebook-page-access-token" +description = "Discovered a Facebook Page Access Token, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''\b(EAA[MC](?i)[a-z0-9]{100,})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "eaam", + "eaac", +] + +[[rules]] +id = "facebook-secret" +description = "Discovered a Facebook Application secret, posing a risk of unauthorized access to Facebook accounts and personal data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:facebook)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["facebook"] + +[[rules]] +id = "fastly-api-token" +description = "Uncovered a Fastly API key, which may compromise CDN and edge cloud services, leading to content delivery and security issues." +regex = '''(?i)[\w.-]{0,50}?(?:fastly)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["fastly"] + +[[rules]] +id = "finicity-api-token" +description = "Detected a Finicity API token, potentially risking financial data access and unauthorized financial operations." +regex = '''(?i)[\w.-]{0,50}?(?:finicity)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finicity"] + +[[rules]] +id = "finicity-client-secret" +description = "Identified a Finicity Client Secret, which could lead to compromised financial service integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:finicity)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finicity"] + +[[rules]] +id = "finnhub-access-token" +description = "Found a Finnhub Access Token, risking unauthorized access to financial market data and analytics." +regex = '''(?i)[\w.-]{0,50}?(?:finnhub)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["finnhub"] + +[[rules]] +id = "flickr-access-token" +description = "Discovered a Flickr Access Token, posing a risk of unauthorized photo management and potential data leakage." +regex = '''(?i)[\w.-]{0,50}?(?:flickr)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["flickr"] + +[[rules]] +id = "flutterwave-encryption-key" +description = "Uncovered a Flutterwave Encryption Key, which may compromise payment processing and sensitive financial information." +regex = '''FLWSECK_TEST-(?i)[a-h0-9]{12}''' +entropy = 2 +keywords = ["flwseck_test"] + +[[rules]] +id = "flutterwave-public-key" +description = "Detected a Finicity Public Key, potentially exposing public cryptographic operations and integrations." +regex = '''FLWPUBK_TEST-(?i)[a-h0-9]{32}-X''' +entropy = 2 +keywords = ["flwpubk_test"] + +[[rules]] +id = "flutterwave-secret-key" +description = "Identified a Flutterwave Secret Key, risking unauthorized financial transactions and data breaches." +regex = '''FLWSECK_TEST-(?i)[a-h0-9]{32}-X''' +entropy = 2 +keywords = ["flwseck_test"] + +[[rules]] +id = "flyio-access-token" +description = "Uncovered a Fly.io API key" +regex = '''\b((?:fo1_[\w-]{43}|fm1[ar]_[a-zA-Z0-9+\/]{100,}={0,3}|fm2_[a-zA-Z0-9+\/]{100,}={0,3}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = [ + "fo1_", + "fm1", + "fm2_", +] + +[[rules]] +id = "frameio-api-token" +description = "Found a Frame.io API token, potentially compromising video collaboration and project management." +regex = '''fio-u-(?i)[a-z0-9\-_=]{64}''' +keywords = ["fio-u-"] + +[[rules]] +id = "freemius-secret-key" +description = "Detected a Freemius secret key, potentially exposing sensitive information." +regex = '''(?i)["']secret_key["']\s*=>\s*["'](sk_[\S]{29})["']''' +path = '''(?i)\.php$''' +keywords = ["secret_key"] + +[[rules]] +id = "freshbooks-access-token" +description = "Discovered a Freshbooks Access Token, posing a risk to accounting software access and sensitive financial data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:freshbooks)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["freshbooks"] + +[[rules]] +id = "gcp-api-key" +description = "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches." +regex = '''\b(AIza[\w-]{35})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["aiza"] +[[rules.allowlists]] +regexes = [ + '''AIzaSyabcdefghijklmnopqrstuvwxyz1234567''', + '''AIzaSyAnLA7NfeLquW1tJFpx_eQCxoX-oo6YyIs''', + '''AIzaSyCkEhVjf3pduRDt6d1yKOMitrUEke8agEM''', + '''AIzaSyDMAScliyLx7F0NPDEJi1QmyCgHIAODrlU''', + '''AIzaSyD3asb-2pEZVqMkmL6M9N6nHZRR_znhrh0''', + '''AIzayDNSXIbFmlXbIE6mCzDLQAqITYefhixbX4A''', + '''AIzaSyAdOS2zB6NCsk1pCdZ4-P6GBdi_UUPwX7c''', + '''AIzaSyASWm6HmTMdYWpgMnjRBjxcQ9CKctWmLd4''', + '''AIzaSyANUvH9H9BsUccjsu2pCmEkOPjjaXeDQgY''', + '''AIzaSyA5_iVawFQ8ABuTZNUdcwERLJv_a_p4wtM''', + '''AIzaSyA4UrcGxgwQFTfaI3no3t7Lt1sjmdnP5sQ''', + '''AIzaSyDSb51JiIcB6OJpwwMicseKRhhrOq1cS7g''', + '''AIzaSyBF2RrAIm4a0mO64EShQfqfd2AFnzAvvuU''', + '''AIzaSyBcE-OOIbhjyR83gm4r2MFCu4MJmprNXsw''', + '''AIzaSyB8qGxt4ec15vitgn44duC5ucxaOi4FmqE''', + '''AIzaSyA8vmApnrHNFE0bApF4hoZ11srVL_n0nvY''', +] + +[[rules]] +id = "generic-api-key" +description = "Detected a Generic API Key, potentially exposing access to various services and sensitive operations." +regex = '''(?i)[\w.-]{0,50}?(?:access|auth|(?-i:[Aa]pi|API)|credential|creds|key|passw(?:or)?d|secret|token)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([\w.=-]{10,150}|[a-z0-9][a-z0-9+/]{11,}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "access", + "api", + "auth", + "key", + "credential", + "creds", + "passwd", + "password", + "secret", + "token", +] +[[rules.allowlists]] +regexes = [ + '''^[a-zA-Z_.-]+$''', +] +[[rules.allowlists]] +regexTarget = "match" +regexes = [ + '''(?i)(?:access(?:ibility|or)|access[_.-]?id|random[_.-]?access|api[_.-]?(?:id|name|version)|rapid|capital|[a-z0-9-]*?api[a-z0-9-]*?:jar:|author|X-MS-Exchange-Organization-Auth|Authentication-Results|(?:credentials?[_.-]?id|withCredentials)|(?:bucket|foreign|hot|idx|natural|primary|pub(?:lic)?|schema|sequence)[_.-]?key|(?:turkey)|key[_.-]?(?:alias|board|code|frame|id|length|mesh|name|pair|press(?:ed)?|ring|selector|signature|size|stone|storetype|word|up|down|left|right)|key[_.-]?vault[_.-]?(?:id|name)|keyVaultToStoreSecrets|key(?:store|tab)[_.-]?(?:file|path)|issuerkeyhash|(?-i:[DdMm]onkey|[DM]ONKEY)|keying|(?:secret)[_.-]?(?:length|name|size)|UserSecretsId|(?:csrf)[_.-]?token|(?:io\.jsonwebtoken[ \t]?:[ \t]?[\w-]+)|(?:api|credentials|token)[_.-]?(?:endpoint|ur[il])|public[_.-]?token|(?:key|token)[_.-]?file|(?-i:(?:[A-Z_]+=\n[A-Z_]+=|[a-z_]+=\n[a-z_]+=)(?:\n|\z))|(?-i:(?:[A-Z.]+=\n[A-Z.]+=|[a-z.]+=\n[a-z.]+=)(?:\n|\z)))''', +] +stopwords = [ + "000000", + "aaaaaa", + "about", + "abstract", + "academy", + "acces", + "account", + "act-", + "act.", + "act_", + "action", + "active", + "actively", + "activity", + "adapter", + "add-", + "add.", + "add_", + "add-on", + "addon", + "addres", + "admin", + "adobe", + "advanced", + "adventure", + "agent", + "agile", + "air-", + "air.", + "air_", + "ajax", + "akka", + "alert", + "alfred", + "algorithm", + "all-", + "all.", + "all_", + "alloy", + "alpha", + "amazon", + "amqp", + "analysi", + "analytic", + "analyzer", + "android", + "angular", + "angularj", + "animate", + "animation", + "another", + "ansible", + "answer", + "ant-", + "ant.", + "ant_", + "any-", + "any.", + "any_", + "apache", + "app-", + "app-", + "app.", + "app.", + "app_", + "app_", + "apple", + "arch", + "archive", + "archived", + "arduino", + "array", + "art-", + "art.", + "art_", + "article", + "asp-", + "asp.", + "asp_", + "asset", + "async", + "atom", + "attention", + "audio", + "audit", + "aura", + "auth", + "author", + "author", + "authorize", + "auto", + "automated", + "automatic", + "awesome", + "aws_", + "azure", + "back", + "backbone", + "backend", + "backup", + "bar-", + "bar.", + "bar_", + "base", + "based", + "bash", + "basic", + "batch", + "been", + "beer", + "behavior", + "being", + "benchmark", + "best", + "beta", + "better", + "big-", + "big.", + "big_", + "binary", + "binding", + "bit-", + "bit.", + "bit_", + "bitcoin", + "block", + "blog", + "board", + "book", + "bookmark", + "boost", + "boot", + "bootstrap", + "bosh", + "bot-", + "bot.", + "bot_", + "bower", + "box-", + "box.", + "box_", + "boxen", + "bracket", + "branch", + "bridge", + "browser", + "brunch", + "buffer", + "bug-", + "bug.", + "bug_", + "build", + "builder", + "building", + "buildout", + "buildpack", + "built", + "bundle", + "busines", + "but-", + "but.", + "but_", + "button", + "cache", + "caching", + "cakephp", + "calendar", + "call", + "camera", + "campfire", + "can-", + "can.", + "can_", + "canva", + "captcha", + "capture", + "card", + "carousel", + "case", + "cassandra", + "cat-", + "cat.", + "cat_", + "category", + "center", + "cento", + "challenge", + "change", + "changelog", + "channel", + "chart", + "chat", + "cheat", + "check", + "checker", + "chef", + "ches", + "chinese", + "chosen", + "chrome", + "ckeditor", + "clas", + "classe", + "classic", + "clean", + "cli-", + "cli.", + "cli_", + "client", + "client", + "clojure", + "clone", + "closure", + "cloud", + "club", + "cluster", + "cms-", + "cms_", + "coco", + "code", + "coding", + "coffee", + "color", + "combination", + "combo", + "command", + "commander", + "comment", + "commit", + "common", + "community", + "compas", + "compiler", + "complete", + "component", + "composer", + "computer", + "computing", + "con-", + "con.", + "con_", + "concept", + "conf", + "config", + "config", + "connect", + "connector", + "console", + "contact", + "container", + "contao", + "content", + "contest", + "context", + "control", + "convert", + "converter", + "conway'", + "cookbook", + "cookie", + "cool", + "copy", + "cordova", + "core", + "couchbase", + "couchdb", + "countdown", + "counter", + "course", + "craft", + "crawler", + "create", + "creating", + "creator", + "credential", + "crm-", + "crm.", + "crm_", + "cros", + "crud", + "csv-", + "csv.", + "csv_", + "cube", + "cucumber", + "cuda", + "current", + "currently", + "custom", + "daemon", + "dark", + "dart", + "dash", + "dashboard", + "data", + "database", + "date", + "day-", + "day.", + "day_", + "dead", + "debian", + "debug", + "debug", + "debugger", + "deck", + "define", + "del-", + "del.", + "del_", + "delete", + "demo", + "deploy", + "design", + "designer", + "desktop", + "detection", + "detector", + "dev-", + "dev.", + "dev_", + "develop", + "developer", + "device", + "devise", + "diff", + "digital", + "directive", + "directory", + "discovery", + "display", + "django", + "dns-", + "dns_", + "doc-", + "doc-", + "doc.", + "doc.", + "doc_", + "doc_", + "docker", + "docpad", + "doctrine", + "document", + "doe-", + "doe.", + "doe_", + "dojo", + "dom-", + "dom.", + "dom_", + "domain", + "done", + "don't", + "dot-", + "dot.", + "dot_", + "dotfile", + "download", + "draft", + "drag", + "drill", + "drive", + "driven", + "driver", + "drop", + "dropbox", + "drupal", + "dsl-", + "dsl.", + "dsl_", + "dynamic", + "easy", + "_ec2_", + "ecdsa", + "eclipse", + "edit", + "editing", + "edition", + "editor", + "element", + "emac", + "email", + "embed", + "embedded", + "ember", + "emitter", + "emulator", + "encoding", + "endpoint", + "engine", + "english", + "enhanced", + "entity", + "entry", + "env_", + "episode", + "erlang", + "error", + "espresso", + "event", + "evented", + "example", + "example", + "exchange", + "exercise", + "experiment", + "expire", + "exploit", + "explorer", + "export", + "exporter", + "expres", + "ext-", + "ext.", + "ext_", + "extended", + "extension", + "external", + "extra", + "extractor", + "fabric", + "facebook", + "factory", + "fake", + "fast", + "feature", + "feed", + "fewfwef", + "ffmpeg", + "field", + "file", + "filter", + "find", + "finder", + "firefox", + "firmware", + "first", + "fish", + "fix-", + "fix_", + "flash", + "flask", + "flat", + "flex", + "flexible", + "flickr", + "flow", + "fluent", + "fluentd", + "fluid", + "folder", + "font", + "force", + "foreman", + "fork", + "form", + "format", + "formatter", + "forum", + "foundry", + "framework", + "free", + "friend", + "friendly", + "front-end", + "frontend", + "ftp-", + "ftp.", + "ftp_", + "fuel", + "full", + "fun-", + "fun.", + "fun_", + "func", + "future", + "gaia", + "gallery", + "game", + "gateway", + "gem-", + "gem.", + "gem_", + "gen-", + "gen.", + "gen_", + "general", + "generator", + "generic", + "genetic", + "get-", + "get.", + "get_", + "getenv", + "getting", + "ghost", + "gist", + "git-", + "git.", + "git_", + "github", + "gitignore", + "gitlab", + "glas", + "gmail", + "gnome", + "gnu-", + "gnu.", + "gnu_", + "goal", + "golang", + "gollum", + "good", + "google", + "gpu-", + "gpu.", + "gpu_", + "gradle", + "grail", + "graph", + "graphic", + "great", + "grid", + "groovy", + "group", + "grunt", + "guard", + "gui-", + "gui.", + "gui_", + "guide", + "guideline", + "gulp", + "gwt-", + "gwt.", + "gwt_", + "hack", + "hackathon", + "hacker", + "hacking", + "hadoop", + "haml", + "handler", + "hardware", + "has-", + "has_", + "hash", + "haskell", + "have", + "haxe", + "hello", + "help", + "helper", + "here", + "hero", + "heroku", + "high", + "hipchat", + "history", + "home", + "homebrew", + "homepage", + "hook", + "host", + "hosting", + "hot-", + "hot.", + "hot_", + "house", + "how-", + "how.", + "how_", + "html", + "http", + "hub-", + "hub.", + "hub_", + "hubot", + "human", + "icon", + "ide-", + "ide.", + "ide_", + "idea", + "identity", + "idiomatic", + "image", + "impact", + "import", + "important", + "importer", + "impres", + "index", + "infinite", + "info", + "injection", + "inline", + "input", + "inside", + "inspector", + "instagram", + "install", + "installer", + "instant", + "intellij", + "interface", + "internet", + "interview", + "into", + "intro", + "ionic", + "iphone", + "ipython", + "irc-", + "irc_", + "iso-", + "iso.", + "iso_", + "issue", + "jade", + "jasmine", + "java", + "jbos", + "jekyll", + "jenkin", + "jetbrains", + "job-", + "job.", + "job_", + "joomla", + "jpa-", + "jpa.", + "jpa_", + "jquery", + "json", + "just", + "kafka", + "karma", + "kata", + "kernel", + "keyboard", + "kindle", + "kit-", + "kit.", + "kit_", + "kitchen", + "knife", + "koan", + "kohana", + "lab-", + "lab-", + "lab.", + "lab.", + "lab_", + "lab_", + "lambda", + "lamp", + "language", + "laravel", + "last", + "latest", + "latex", + "launcher", + "layer", + "layout", + "lazy", + "ldap", + "leaflet", + "league", + "learn", + "learning", + "led-", + "led.", + "led_", + "leetcode", + "les-", + "les.", + "les_", + "level", + "leveldb", + "lib-", + "lib.", + "lib_", + "librarie", + "library", + "license", + "life", + "liferay", + "light", + "lightbox", + "like", + "line", + "link", + "linked", + "linkedin", + "linux", + "lisp", + "list", + "lite", + "little", + "load", + "loader", + "local", + "location", + "lock", + "log-", + "log.", + "log_", + "logger", + "logging", + "logic", + "login", + "logstash", + "longer", + "look", + "love", + "lua-", + "lua.", + "lua_", + "mac-", + "mac.", + "mac_", + "machine", + "made", + "magento", + "magic", + "mail", + "make", + "maker", + "making", + "man-", + "man.", + "man_", + "manage", + "manager", + "manifest", + "manual", + "map-", + "map-", + "map.", + "map.", + "map_", + "map_", + "mapper", + "mapping", + "markdown", + "markup", + "master", + "math", + "matrix", + "maven", + "md5", + "mean", + "media", + "mediawiki", + "meetup", + "memcached", + "memory", + "menu", + "merchant", + "message", + "messaging", + "meta", + "metadata", + "meteor", + "method", + "metric", + "micro", + "middleman", + "migration", + "minecraft", + "miner", + "mini", + "minimal", + "mirror", + "mit-", + "mit.", + "mit_", + "mobile", + "mocha", + "mock", + "mod-", + "mod.", + "mod_", + "mode", + "model", + "modern", + "modular", + "module", + "modx", + "money", + "mongo", + "mongodb", + "mongoid", + "mongoose", + "monitor", + "monkey", + "more", + "motion", + "moved", + "movie", + "mozilla", + "mqtt", + "mule", + "multi", + "multiple", + "music", + "mustache", + "mvc-", + "mvc.", + "mvc_", + "mysql", + "nagio", + "name", + "native", + "need", + "neo-", + "neo.", + "neo_", + "nest", + "nested", + "net-", + "net.", + "net_", + "nette", + "network", + "new-", + "new-", + "new.", + "new.", + "new_", + "new_", + "next", + "nginx", + "ninja", + "nlp-", + "nlp.", + "nlp_", + "node", + "nodej", + "nosql", + "not-", + "not.", + "not_", + "note", + "notebook", + "notepad", + "notice", + "notifier", + "now-", + "now.", + "now_", + "number", + "oauth", + "object", + "objective", + "obsolete", + "ocaml", + "octopres", + "official", + "old-", + "old.", + "old_", + "onboard", + "online", + "only", + "open", + "opencv", + "opengl", + "openshift", + "openwrt", + "option", + "oracle", + "org-", + "org.", + "org_", + "origin", + "original", + "orm-", + "orm.", + "orm_", + "osx-", + "osx_", + "our-", + "our.", + "our_", + "out-", + "out.", + "out_", + "output", + "over", + "overview", + "own-", + "own.", + "own_", + "pack", + "package", + "packet", + "page", + "page", + "panel", + "paper", + "paperclip", + "para", + "parallax", + "parallel", + "parse", + "parser", + "parsing", + "particle", + "party", + "password", + "patch", + "path", + "pattern", + "payment", + "paypal", + "pdf-", + "pdf.", + "pdf_", + "pebble", + "people", + "perl", + "personal", + "phalcon", + "phoenix", + "phone", + "phonegap", + "photo", + "php-", + "php.", + "php_", + "physic", + "picker", + "pipeline", + "platform", + "play", + "player", + "please", + "plu-", + "plu.", + "plu_", + "plug-in", + "plugin", + "plupload", + "png-", + "png.", + "png_", + "poker", + "polyfill", + "polymer", + "pool", + "pop-", + "pop.", + "pop_", + "popcorn", + "popup", + "port", + "portable", + "portal", + "portfolio", + "post", + "power", + "powered", + "powerful", + "prelude", + "pretty", + "preview", + "principle", + "print", + "pro-", + "pro.", + "pro_", + "problem", + "proc", + "product", + "profile", + "profiler", + "program", + "progres", + "project", + "protocol", + "prototype", + "provider", + "proxy", + "public", + "pull", + "puppet", + "pure", + "purpose", + "push", + "pusher", + "pyramid", + "python", + "quality", + "query", + "queue", + "quick", + "rabbitmq", + "rack", + "radio", + "rail", + "railscast", + "random", + "range", + "raspberry", + "rdf-", + "rdf.", + "rdf_", + "react", + "reactive", + "read", + "reader", + "readme", + "ready", + "real", + "reality", + "real-time", + "realtime", + "recipe", + "recorder", + "red-", + "red.", + "red_", + "reddit", + "redi", + "redmine", + "reference", + "refinery", + "refresh", + "registry", + "related", + "release", + "remote", + "rendering", + "repo", + "report", + "request", + "require", + "required", + "requirej", + "research", + "resource", + "response", + "resque", + "rest", + "restful", + "resume", + "reveal", + "reverse", + "review", + "riak", + "rich", + "right", + "ring", + "robot", + "role", + "room", + "router", + "routing", + "rpc-", + "rpc.", + "rpc_", + "rpg-", + "rpg.", + "rpg_", + "rspec", + "ruby-", + "ruby.", + "ruby_", + "rule", + "run-", + "run.", + "run_", + "runner", + "running", + "runtime", + "rust", + "rvm-", + "rvm.", + "rvm_", + "salt", + "sample", + "sample", + "sandbox", + "sas-", + "sas.", + "sas_", + "sbt-", + "sbt.", + "sbt_", + "scala", + "scalable", + "scanner", + "schema", + "scheme", + "school", + "science", + "scraper", + "scratch", + "screen", + "script", + "scroll", + "scs-", + "scs.", + "scs_", + "sdk-", + "sdk.", + "sdk_", + "sdl-", + "sdl.", + "sdl_", + "search", + "secure", + "security", + "see-", + "see.", + "see_", + "seed", + "select", + "selector", + "selenium", + "semantic", + "sencha", + "send", + "sentiment", + "serie", + "server", + "service", + "session", + "set-", + "set.", + "set_", + "setting", + "setting", + "setup", + "sha1", + "sha2", + "sha256", + "share", + "shared", + "sharing", + "sheet", + "shell", + "shield", + "shipping", + "shop", + "shopify", + "shortener", + "should", + "show", + "showcase", + "side", + "silex", + "simple", + "simulator", + "single", + "site", + "skeleton", + "sketch", + "skin", + "slack", + "slide", + "slider", + "slim", + "small", + "smart", + "smtp", + "snake", + "snapshot", + "snippet", + "soap", + "social", + "socket", + "software", + "solarized", + "solr", + "solution", + "solver", + "some", + "soon", + "source", + "space", + "spark", + "spatial", + "spec", + "sphinx", + "spine", + "spotify", + "spree", + "spring", + "sprite", + "sql-", + "sql.", + "sql_", + "sqlite", + "ssh-", + "ssh.", + "ssh_", + "stack", + "staging", + "standard", + "stanford", + "start", + "started", + "starter", + "startup", + "stat", + "statamic", + "state", + "static", + "statistic", + "statsd", + "statu", + "steam", + "step", + "still", + "stm-", + "stm.", + "stm_", + "storage", + "store", + "storm", + "story", + "strategy", + "stream", + "streaming", + "string", + "stripe", + "structure", + "studio", + "study", + "stuff", + "style", + "sublime", + "sugar", + "suite", + "summary", + "super", + "support", + "supported", + "svg-", + "svg.", + "svg_", + "svn-", + "svn.", + "svn_", + "swagger", + "swift", + "switch", + "switcher", + "symfony", + "symphony", + "sync", + "synopsi", + "syntax", + "system", + "system", + "tab-", + "tab-", + "tab.", + "tab.", + "tab_", + "tab_", + "table", + "tag-", + "tag-", + "tag.", + "tag.", + "tag_", + "tag_", + "talk", + "target", + "task", + "tcp-", + "tcp.", + "tcp_", + "tdd-", + "tdd.", + "tdd_", + "team", + "tech", + "template", + "term", + "terminal", + "testing", + "tetri", + "text", + "textmate", + "theme", + "theory", + "three", + "thrift", + "time", + "timeline", + "timer", + "tiny", + "tinymce", + "tip-", + "tip.", + "tip_", + "title", + "todo", + "todomvc", + "token", + "tool", + "toolbox", + "toolkit", + "top-", + "top.", + "top_", + "tornado", + "touch", + "tower", + "tracker", + "tracking", + "traffic", + "training", + "transfer", + "translate", + "transport", + "tree", + "trello", + "try-", + "try.", + "try_", + "tumblr", + "tut-", + "tut.", + "tut_", + "tutorial", + "tweet", + "twig", + "twitter", + "type", + "typo", + "ubuntu", + "uiview", + "ultimate", + "under", + "unit", + "unity", + "universal", + "unix", + "update", + "updated", + "upgrade", + "upload", + "uploader", + "uri-", + "uri.", + "uri_", + "url-", + "url.", + "url_", + "usage", + "usb-", + "usb.", + "usb_", + "use-", + "use.", + "use_", + "used", + "useful", + "user", + "using", + "util", + "utilitie", + "utility", + "vagrant", + "validator", + "value", + "variou", + "varnish", + "version", + "via-", + "via.", + "via_", + "video", + "view", + "viewer", + "vim-", + "vim.", + "vim_", + "vimrc", + "virtual", + "vision", + "visual", + "vpn", + "want", + "warning", + "watch", + "watcher", + "wave", + "way-", + "way.", + "way_", + "weather", + "web-", + "web_", + "webapp", + "webgl", + "webhook", + "webkit", + "webrtc", + "website", + "websocket", + "welcome", + "welcome", + "what", + "what'", + "when", + "where", + "which", + "why-", + "why.", + "why_", + "widget", + "wifi", + "wiki", + "win-", + "win.", + "win_", + "window", + "wip-", + "wip.", + "wip_", + "within", + "without", + "wizard", + "word", + "wordpres", + "work", + "worker", + "workflow", + "working", + "workshop", + "world", + "wrapper", + "write", + "writer", + "writing", + "written", + "www-", + "www.", + "www_", + "xamarin", + "xcode", + "xml-", + "xml.", + "xml_", + "xmpp", + "xxxxxx", + "yahoo", + "yaml", + "yandex", + "yeoman", + "yet-", + "yet.", + "yet_", + "yii-", + "yii.", + "yii_", + "youtube", + "yui-", + "yui.", + "yui_", + "zend", + "zero", + "zip-", + "zip.", + "zip_", + "zsh-", + "zsh.", + "zsh_", + "6fe4476ee5a1832882e326b506d14126", +] +[[rules.allowlists]] +regexTarget = "line" +regexes = [ + '''--mount=type=secret,''', + '''import[ \t]+{[ \t\w,]+}[ \t]+from[ \t]+['"][^'"]+['"]''', +] +[[rules.allowlists]] +condition = "AND" +paths = [ + '''\.bb$''','''\.bbappend$''','''\.bbclass$''','''\.inc$''', +] +regexTarget = "line" +regexes = [ + '''LICENSE[^=]*=\s*"[^"]+''', + '''LIC_FILES_CHKSUM[^=]*=\s*"[^"]+''', + '''SRC[^=]*=\s*"[a-zA-Z0-9]+''', +] + +[[rules]] +id = "github-app-token" +description = "Identified a GitHub App Token, which may compromise GitHub application integrations and source code security." +regex = '''(?:ghu|ghs)_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = [ + "ghu_", + "ghs_", +] +[[rules.allowlists]] +paths = [ + '''(?:^|/)@octokit/auth-token/README\.md$''', +] + +[[rules]] +id = "github-fine-grained-pat" +description = "Found a GitHub Fine-Grained Personal Access Token, risking unauthorized repository access and code manipulation." +regex = '''github_pat_\w{82}''' +entropy = 3 +keywords = ["github_pat_"] + +[[rules]] +id = "github-oauth" +description = "Discovered a GitHub OAuth Access Token, posing a risk of compromised GitHub account integrations and data leaks." +regex = '''gho_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["gho_"] + +[[rules]] +id = "github-pat" +description = "Uncovered a GitHub Personal Access Token, potentially leading to unauthorized repository access and sensitive content exposure." +regex = '''ghp_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["ghp_"] +[[rules.allowlists]] +paths = [ + '''(?:^|/)@octokit/auth-token/README\.md$''', +] + +[[rules]] +id = "github-refresh-token" +description = "Detected a GitHub Refresh Token, which could allow prolonged unauthorized access to GitHub services." +regex = '''ghr_[0-9a-zA-Z]{36}''' +entropy = 3 +keywords = ["ghr_"] + +[[rules]] +id = "gitlab-cicd-job-token" +description = "Identified a GitLab CI/CD Job Token, potential access to projects and some APIs on behalf of a user while the CI job is running." +regex = '''glcbt-[0-9a-zA-Z]{1,5}_[0-9a-zA-Z_-]{20}''' +entropy = 3 +keywords = ["glcbt-"] + +[[rules]] +id = "gitlab-deploy-token" +description = "Identified a GitLab Deploy Token, risking access to repositories, packages and containers with write access." +regex = '''gldt-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["gldt-"] + +[[rules]] +id = "gitlab-feature-flag-client-token" +description = "Identified a GitLab feature flag client token, risks exposing user lists and features flags used by an application." +regex = '''glffct-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glffct-"] + +[[rules]] +id = "gitlab-feed-token" +description = "Identified a GitLab feed token, risking exposure of user data." +regex = '''glft-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glft-"] + +[[rules]] +id = "gitlab-incoming-mail-token" +description = "Identified a GitLab incoming mail token, risking manipulation of data sent by mail." +regex = '''glimt-[0-9a-zA-Z_\-]{25}''' +entropy = 3 +keywords = ["glimt-"] + +[[rules]] +id = "gitlab-kubernetes-agent-token" +description = "Identified a GitLab Kubernetes Agent token, risking access to repos and registry of projects connected via agent." +regex = '''glagent-[0-9a-zA-Z_\-]{50}''' +entropy = 3 +keywords = ["glagent-"] + +[[rules]] +id = "gitlab-oauth-app-secret" +description = "Identified a GitLab OIDC Application Secret, risking access to apps using GitLab as authentication provider." +regex = '''gloas-[0-9a-zA-Z_\-]{64}''' +entropy = 3 +keywords = ["gloas-"] + +[[rules]] +id = "gitlab-pat" +description = "Identified a GitLab Personal Access Token, risking unauthorized access to GitLab repositories and codebase exposure." +regex = '''glpat-[\w-]{20}''' +entropy = 3 +keywords = ["glpat-"] + +[[rules]] +id = "gitlab-pat-routable" +description = "Identified a GitLab Personal Access Token (routable), risking unauthorized access to GitLab repositories and codebase exposure." +regex = '''\bglpat-[0-9a-zA-Z_-]{27,300}\.[0-9a-z]{2}[0-9a-z]{7}\b''' +entropy = 4 +keywords = ["glpat-"] + +[[rules]] +id = "gitlab-ptt" +description = "Found a GitLab Pipeline Trigger Token, potentially compromising continuous integration workflows and project security." +regex = '''glptt-[0-9a-f]{40}''' +entropy = 3 +keywords = ["glptt-"] + +[[rules]] +id = "gitlab-rrt" +description = "Discovered a GitLab Runner Registration Token, posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''GR1348941[\w-]{20}''' +entropy = 3 +keywords = ["gr1348941"] + +[[rules]] +id = "gitlab-runner-authentication-token" +description = "Discovered a GitLab Runner Authentication Token, posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''glrt-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glrt-"] + +[[rules]] +id = "gitlab-runner-authentication-token-routable" +description = "Discovered a GitLab Runner Authentication Token (Routable), posing a risk to CI/CD pipeline integrity and unauthorized access." +regex = '''\bglrt-t\d_[0-9a-zA-Z_\-]{27,300}\.[0-9a-z]{2}[0-9a-z]{7}\b''' +entropy = 4 +keywords = ["glrt-"] + +[[rules]] +id = "gitlab-scim-token" +description = "Discovered a GitLab SCIM Token, posing a risk to unauthorized access for a organization or instance." +regex = '''glsoat-[0-9a-zA-Z_\-]{20}''' +entropy = 3 +keywords = ["glsoat-"] + +[[rules]] +id = "gitlab-session-cookie" +description = "Discovered a GitLab Session Cookie, posing a risk to unauthorized access to a user account." +regex = '''_gitlab_session=[0-9a-z]{32}''' +entropy = 3 +keywords = ["_gitlab_session="] + +[[rules]] +id = "gitter-access-token" +description = "Uncovered a Gitter Access Token, which may lead to unauthorized access to chat and communication services." +regex = '''(?i)[\w.-]{0,50}?(?:gitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["gitter"] + +[[rules]] +id = "gocardless-api-token" +description = "Detected a GoCardless API token, potentially risking unauthorized direct debit payment operations and financial data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:gocardless)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(live_(?i)[a-z0-9\-_=]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "live_", + "gocardless", +] + +[[rules]] +id = "grafana-api-key" +description = "Identified a Grafana API key, which could compromise monitoring dashboards and sensitive data analytics." +regex = '''(?i)\b(eyJrIjoi[A-Za-z0-9]{70,400}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["eyjrijoi"] + +[[rules]] +id = "grafana-cloud-api-token" +description = "Found a Grafana cloud API token, risking unauthorized access to cloud-based monitoring services and data exposure." +regex = '''(?i)\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["glc_"] + +[[rules]] +id = "grafana-service-account-token" +description = "Discovered a Grafana service account token, posing a risk of compromised monitoring services and data integrity." +regex = '''(?i)\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["glsa_"] + +[[rules]] +id = "harness-api-key" +description = "Identified a Harness Access Token (PAT or SAT), risking unauthorized access to a Harness account." +regex = '''(?:pat|sat)\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9]{24}\.[a-zA-Z0-9]{20}''' +keywords = [ + "pat.", + "sat.", +] + +[[rules]] +id = "hashicorp-tf-api-token" +description = "Uncovered a HashiCorp Terraform user/org API token, which may lead to unauthorized infrastructure management and security breaches." +regex = '''(?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70}''' +entropy = 3.5 +keywords = ["atlasv1"] + +[[rules]] +id = "hashicorp-tf-password" +description = "Identified a HashiCorp Terraform password field, risking unauthorized infrastructure configuration and security breaches." +regex = '''(?i)[\w.-]{0,50}?(?:administrator_login_password|password)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}("[a-z0-9=_\-]{8,20}")(?:[\x60'"\s;]|\\[nr]|$)''' +path = '''(?i)\.(?:tf|hcl)$''' +entropy = 2 +keywords = [ + "administrator_login_password", + "password", +] + +[[rules]] +id = "heroku-api-key" +description = "Detected a Heroku API Key, potentially compromising cloud application deployments and operational security." +regex = '''(?i)[\w.-]{0,50}?(?:heroku)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["heroku"] + +[[rules]] +id = "hubspot-api-key" +description = "Found a HubSpot API Token, posing a risk to CRM data integrity and unauthorized marketing operations." +regex = '''(?i)[\w.-]{0,50}?(?:hubspot)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["hubspot"] + +[[rules]] +id = "huggingface-access-token" +description = "Discovered a Hugging Face Access token, which could lead to unauthorized access to AI models and sensitive data." +regex = '''\b(hf_(?i:[a-z]{34}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["hf_"] + +[[rules]] +id = "huggingface-organization-api-token" +description = "Uncovered a Hugging Face Organization API token, potentially compromising AI organization accounts and associated data." +regex = '''\b(api_org_(?i:[a-z]{34}))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["api_org_"] + +[[rules]] +id = "infracost-api-token" +description = "Detected an Infracost API Token, risking unauthorized access to cloud cost estimation tools and financial data." +regex = '''\b(ico-[a-zA-Z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["ico-"] + +[[rules]] +id = "intercom-api-key" +description = "Identified an Intercom API Token, which could compromise customer communication channels and data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:intercom)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{60})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["intercom"] + +[[rules]] +id = "intra42-client-secret" +description = "Found a Intra42 client secret, which could lead to unauthorized access to the 42School API and sensitive data." +regex = '''\b(s-s4t2(?:ud|af)-(?i)[abcdef0123456789]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "intra", + "s-s4t2ud-", + "s-s4t2af-", +] + +[[rules]] +id = "jfrog-api-key" +description = "Found a JFrog API Key, posing a risk of unauthorized access to software artifact repositories and build pipelines." +regex = '''(?i)[\w.-]{0,50}?(?:jfrog|artifactory|bintray|xray)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{73})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "jfrog", + "artifactory", + "bintray", + "xray", +] + +[[rules]] +id = "jfrog-identity-token" +description = "Discovered a JFrog Identity Token, potentially compromising access to JFrog services and sensitive software artifacts." +regex = '''(?i)[\w.-]{0,50}?(?:jfrog|artifactory|bintray|xray)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "jfrog", + "artifactory", + "bintray", + "xray", +] + +[[rules]] +id = "jwt" +description = "Uncovered a JSON Web Token, which may lead to unauthorized access to web applications and sensitive user data." +regex = '''\b(ey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9\/\\_-]{17,}\.(?:[a-zA-Z0-9\/\\_-]{10,}={0,2})?)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["ey"] + +[[rules]] +id = "jwt-base64" +description = "Detected a Base64-encoded JSON Web Token, posing a risk of exposing encoded authentication and data exchange information." +regex = '''\bZXlK(?:(?PaGJHY2lPaU)|(?PaGNIVWlPaU)|(?PaGNIWWlPaU)|(?PaGRXUWlPaU)|(?PaU5qUWlP)|(?PamNtbDBJanBi)|(?PamRIa2lPaU)|(?PbGNHc2lPbn)|(?PbGJtTWlPaU)|(?PcWEzVWlPaU)|(?PcWQyc2lPb)|(?PcGMzTWlPaU)|(?PcGRpSTZJ)|(?PcmFXUWlP)|(?PclpYbGZiM0J6SWpwY)|(?PcmRIa2lPaUp)|(?PdWIyNWpaU0k2)|(?Pd01tTWlP)|(?Pd01uTWlPaU)|(?Pd2NIUWlPaU)|(?PemRXSWlPaU)|(?PemRuUWlP)|(?PMFlXY2lPaU)|(?PMGVYQWlPaUp)|(?PMWNtd2l)|(?PMWMyVWlPaUp)|(?PMlpYSWlPaU)|(?PMlpYSnphVzl1SWpv)|(?PNElqb2)|(?PNE5XTWlP)|(?PNE5YUWlPaU)|(?PNE5YUWpVekkxTmlJNkl)|(?PNE5YVWlPaU)|(?PNmFYQWlPaU))[a-zA-Z0-9\/\\_+\-\r\n]{40,}={0,2}''' +entropy = 2 +keywords = ["zxlk"] + +[[rules]] +id = "kraken-access-token" +description = "Identified a Kraken Access Token, potentially compromising cryptocurrency trading accounts and financial security." +regex = '''(?i)[\w.-]{0,50}?(?:kraken)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9\/=_\+\-]{80,90})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kraken"] + +[[rules]] +id = "kubernetes-secret-yaml" +description = "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments" +regex = '''(?i)(?:\bkind:[ \t]*["']?\bsecret\b["']?(?:.|\s){0,200}?\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))|\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))(?:.|\s){0,200}?\bkind:[ \t]*["']?\bsecret\b["']?)''' +path = '''(?i)\.ya?ml$''' +keywords = ["secret"] +[[rules.allowlists]] +regexes = [ + '''[\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:\{\{[ \t\w"|$:=,.-]+}}|""|'')''', +] +[[rules.allowlists]] +regexTarget = "match" +regexes = [ + '''(kind:(?:.|\s)+\n---\n(?:.|\s)+\bdata:|data:(?:.|\s)+\n---\n(?:.|\s)+\bkind:)''', +] + +[[rules]] +id = "kucoin-access-token" +description = "Found a Kucoin Access Token, risking unauthorized access to cryptocurrency exchange services and transactions." +regex = '''(?i)[\w.-]{0,50}?(?:kucoin)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kucoin"] + +[[rules]] +id = "kucoin-secret-key" +description = "Discovered a Kucoin Secret Key, which could lead to compromised cryptocurrency operations and financial data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:kucoin)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["kucoin"] + +[[rules]] +id = "launchdarkly-access-token" +description = "Uncovered a Launchdarkly Access Token, potentially compromising feature flag management and application functionality." +regex = '''(?i)[\w.-]{0,50}?(?:launchdarkly)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["launchdarkly"] + +[[rules]] +id = "linear-api-key" +description = "Detected a Linear API Token, posing a risk to project management tools and sensitive task data." +regex = '''lin_api_(?i)[a-z0-9]{40}''' +entropy = 2 +keywords = ["lin_api_"] + +[[rules]] +id = "linear-client-secret" +description = "Identified a Linear Client Secret, which may compromise secure integrations and sensitive project management data." +regex = '''(?i)[\w.-]{0,50}?(?:linear)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["linear"] + +[[rules]] +id = "linkedin-client-id" +description = "Found a LinkedIn Client ID, risking unauthorized access to LinkedIn integrations and professional data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:linked[_-]?in)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{14})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "linkedin", + "linked_in", + "linked-in", +] + +[[rules]] +id = "linkedin-client-secret" +description = "Discovered a LinkedIn Client secret, potentially compromising LinkedIn application integrations and user data." +regex = '''(?i)[\w.-]{0,50}?(?:linked[_-]?in)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "linkedin", + "linked_in", + "linked-in", +] + +[[rules]] +id = "lob-api-key" +description = "Uncovered a Lob API Key, which could lead to unauthorized access to mailing and address verification services." +regex = '''(?i)[\w.-]{0,50}?(?:lob)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}((live|test)_[a-f0-9]{35})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "test_", + "live_", +] + +[[rules]] +id = "lob-pub-api-key" +description = "Detected a Lob Publishable API Key, posing a risk of exposing mail and print service integrations." +regex = '''(?i)[\w.-]{0,50}?(?:lob)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}((test|live)_pub_[a-f0-9]{31})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "test_pub", + "live_pub", + "_pub", +] + +[[rules]] +id = "mailchimp-api-key" +description = "Identified a Mailchimp API key, potentially compromising email marketing campaigns and subscriber data." +regex = '''(?i)[\w.-]{0,50}?(?:MailchimpSDK.initialize|mailchimp)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{32}-us\d\d)(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailchimp"] + +[[rules]] +id = "mailgun-private-api-token" +description = "Found a Mailgun private API token, risking unauthorized email service operations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(key-[a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mailgun-pub-key" +description = "Discovered a Mailgun public validation key, which could expose email verification processes and associated data." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(pubkey-[a-f0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mailgun-signing-key" +description = "Uncovered a Mailgun webhook signing key, potentially compromising email automation and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:mailgun)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mailgun"] + +[[rules]] +id = "mapbox-api-token" +description = "Detected a MapBox API token, posing a risk to geospatial services and sensitive location data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:mapbox)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(pk\.[a-z0-9]{60}\.[a-z0-9]{22})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mapbox"] + +[[rules]] +id = "mattermost-access-token" +description = "Identified a Mattermost Access Token, which may compromise team communication channels and data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:mattermost)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{26})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["mattermost"] + +[[rules]] +id = "maxmind-license-key" +description = "Discovered a potential MaxMind license key." +regex = '''\b([A-Za-z0-9]{6}_[A-Za-z0-9]{29}_mmk)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["_mmk"] + +[[rules]] +id = "messagebird-api-token" +description = "Found a MessageBird API token, risking unauthorized access to communication platforms and message data." +regex = '''(?i)[\w.-]{0,50}?(?:message[_-]?bird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{25})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "messagebird", + "message-bird", + "message_bird", +] + +[[rules]] +id = "messagebird-client-id" +description = "Discovered a MessageBird client ID, potentially compromising API integrations and sensitive communication data." +regex = '''(?i)[\w.-]{0,50}?(?:message[_-]?bird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "messagebird", + "message-bird", + "message_bird", +] + +[[rules]] +id = "microsoft-teams-webhook" +description = "Uncovered a Microsoft Teams Webhook, which could lead to unauthorized access to team collaboration tools and data leaks." +regex = '''https://[a-z0-9]+\.webhook\.office\.com/webhookb2/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}@[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}/IncomingWebhook/[a-z0-9]{32}/[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}''' +keywords = [ + "webhook.office.com", + "webhookb2", + "incomingwebhook", +] + +[[rules]] +id = "netlify-access-token" +description = "Detected a Netlify Access Token, potentially compromising web hosting services and site management." +regex = '''(?i)[\w.-]{0,50}?(?:netlify)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40,46})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["netlify"] + +[[rules]] +id = "new-relic-browser-api-token" +description = "Identified a New Relic ingest browser API token, risking unauthorized access to application performance data and analytics." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRJS-[a-f0-9]{19})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrjs-"] + +[[rules]] +id = "new-relic-insert-key" +description = "Discovered a New Relic insight insert key, compromising data injection into the platform." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRII-[a-z0-9-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrii-"] + +[[rules]] +id = "new-relic-user-api-id" +description = "Found a New Relic user API ID, posing a risk to application monitoring services and data integrity." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "new-relic", + "newrelic", + "new_relic", +] + +[[rules]] +id = "new-relic-user-api-key" +description = "Discovered a New Relic user API Key, which could lead to compromised application insights and performance monitoring." +regex = '''(?i)[\w.-]{0,50}?(?:new-relic|newrelic|new_relic)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(NRAK-[a-z0-9]{27})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["nrak"] + +[[rules]] +id = "npm-access-token" +description = "Uncovered an npm access token, potentially compromising package management and code repository access." +regex = '''(?i)\b(npm_[a-z0-9]{36})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["npm_"] + +[[rules]] +id = "nuget-config-password" +description = "Identified a password within a Nuget config file, potentially compromising package management access." +regex = '''(?i)''' +path = '''(?i)nuget\.config$''' +entropy = 1 +keywords = ["|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "nytimes", + "new-york-times", + "newyorktimes", +] + +[[rules]] +id = "octopus-deploy-api-key" +description = "Discovered a potential Octopus Deploy API key, risking application deployments and operational security." +regex = '''\b(API-[A-Z0-9]{26})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["api-"] + +[[rules]] +id = "okta-access-token" +description = "Identified an Okta Access Token, which may compromise identity management services and user authentication data." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Oo]kta|OKTA))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(00[\w=\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["okta"] + +[[rules]] +id = "openai-api-key" +description = "Found an OpenAI API Key, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["t3blbkfj"] + +[[rules]] +id = "openshift-user-token" +description = "Found an OpenShift user token, potentially compromising an OpenShift/Kubernetes cluster." +regex = '''\b(sha256~[\w-]{43})(?:[^\w-]|\z)''' +entropy = 3.5 +keywords = ["sha256~"] + +[[rules]] +id = "pkcs12-file" +description = "Found a PKCS #12 file, which commonly contain bundled private keys." +path = '''(?i)(?:^|\/)[^\/]+\.p(?:12|fx)$''' + +[[rules]] +id = "plaid-api-token" +description = "Discovered a Plaid API Token, potentially compromising financial data aggregation and banking services." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(access-(?:sandbox|development|production)-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["plaid"] + +[[rules]] +id = "plaid-client-id" +description = "Uncovered a Plaid Client ID, which could lead to unauthorized financial service integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{24})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["plaid"] + +[[rules]] +id = "plaid-secret-key" +description = "Detected a Plaid Secret key, risking unauthorized access to financial accounts and sensitive transaction data." +regex = '''(?i)[\w.-]{0,50}?(?:plaid)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["plaid"] + +[[rules]] +id = "planetscale-api-token" +description = "Identified a PlanetScale API token, potentially compromising database management and operations." +regex = '''\b(pscale_tkn_(?i)[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_tkn_"] + +[[rules]] +id = "planetscale-oauth-token" +description = "Found a PlanetScale OAuth token, posing a risk to database access control and sensitive data integrity." +regex = '''\b(pscale_oauth_[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_oauth_"] + +[[rules]] +id = "planetscale-password" +description = "Discovered a PlanetScale password, which could lead to unauthorized database operations and data breaches." +regex = '''(?i)\b(pscale_pw_(?i)[\w=\.-]{32,64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pscale_pw_"] + +[[rules]] +id = "postman-api-token" +description = "Uncovered a Postman API token, potentially compromising API testing and development workflows." +regex = '''\b(PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["pmak-"] + +[[rules]] +id = "prefect-api-token" +description = "Detected a Prefect API token, risking unauthorized access to workflow management and automation services." +regex = '''\b(pnu_[a-zA-Z0-9]{36})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["pnu_"] + +[[rules]] +id = "private-key" +description = "Identified a Private Key, which may compromise cryptographic security and sensitive data encryption." +regex = '''(?i)-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\s\S-]{64,}?KEY(?: BLOCK)?-----''' +keywords = ["-----begin"] + +[[rules]] +id = "privateai-api-token" +description = "Identified a PrivateAI Token, posing a risk of unauthorized access to AI services and data manipulation." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:private[_-]?ai)(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{32})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "privateai", + "private_ai", + "private-ai", +] + +[[rules]] +id = "pulumi-api-token" +description = "Found a Pulumi API token, posing a risk to infrastructure as code services and cloud resource management." +regex = '''\b(pul-[a-f0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["pul-"] + +[[rules]] +id = "pypi-upload-token" +description = "Discovered a PyPI upload token, potentially compromising Python package distribution and repository integrity." +regex = '''pypi-AgEIcHlwaS5vcmc[\w-]{50,1000}''' +entropy = 3 +keywords = ["pypi-ageichlwas5vcmc"] + +[[rules]] +id = "rapidapi-access-token" +description = "Uncovered a RapidAPI Access Token, which could lead to unauthorized access to various APIs and data services." +regex = '''(?i)[\w.-]{0,50}?(?:rapidapi)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9_-]{50})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["rapidapi"] + +[[rules]] +id = "readme-api-token" +description = "Detected a Readme API token, risking unauthorized documentation management and content exposure." +regex = '''\b(rdme_[a-z0-9]{70})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["rdme_"] + +[[rules]] +id = "rubygems-api-token" +description = "Identified a Rubygem API token, potentially compromising Ruby library distribution and package management." +regex = '''\b(rubygems_[a-f0-9]{48})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["rubygems_"] + +[[rules]] +id = "scalingo-api-token" +description = "Found a Scalingo API token, posing a risk to cloud platform services and application deployment security." +regex = '''\b(tk-us-[\w-]{48})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["tk-us-"] + +[[rules]] +id = "sendbird-access-id" +description = "Discovered a Sendbird Access ID, which could compromise chat and messaging platform integrations." +regex = '''(?i)[\w.-]{0,50}?(?:sendbird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sendbird"] + +[[rules]] +id = "sendbird-access-token" +description = "Uncovered a Sendbird Access Token, potentially risking unauthorized access to communication services and user data." +regex = '''(?i)[\w.-]{0,50}?(?:sendbird)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sendbird"] + +[[rules]] +id = "sendgrid-api-token" +description = "Detected a SendGrid API token, posing a risk of unauthorized email service operations and data exposure." +regex = '''\b(SG\.(?i)[a-z0-9=_\-\.]{66})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["sg."] + +[[rules]] +id = "sendinblue-api-token" +description = "Identified a Sendinblue API token, which may compromise email marketing services and subscriber data privacy." +regex = '''\b(xkeysib-[a-f0-9]{64}\-(?i)[a-z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["xkeysib-"] + +[[rules]] +id = "sentry-access-token" +description = "Found a Sentry.io Access Token (old format), risking unauthorized access to error tracking services and sensitive application data." +regex = '''(?i)[\w.-]{0,50}?(?:sentry)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sentry"] + +[[rules]] +id = "sentry-org-token" +description = "Found a Sentry.io Organization Token, risking unauthorized access to error tracking services and sensitive application data." +regex = '''\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}(?:[^a-zA-Z0-9+/]|\z)''' +entropy = 4.5 +keywords = ["sntrys_eyjpyxqio"] + +[[rules]] +id = "sentry-user-token" +description = "Found a Sentry.io User Token, risking unauthorized access to error tracking services and sensitive application data." +regex = '''\b(sntryu_[a-f0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = ["sntryu_"] + +[[rules]] +id = "settlemint-application-access-token" +description = "Found a Settlemint Application Access Token." +regex = '''\b(sm_aat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_aat"] + +[[rules]] +id = "settlemint-personal-access-token" +description = "Found a Settlemint Personal Access Token." +regex = '''\b(sm_pat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_pat"] + +[[rules]] +id = "settlemint-service-access-token" +description = "Found a Settlemint Service Access Token." +regex = '''\b(sm_sat_[a-zA-Z0-9]{16})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sm_sat"] + +[[rules]] +id = "shippo-api-token" +description = "Discovered a Shippo API token, potentially compromising shipping services and customer order data." +regex = '''\b(shippo_(?:live|test)_[a-fA-F0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = ["shippo_"] + +[[rules]] +id = "shopify-access-token" +description = "Uncovered a Shopify access token, which could lead to unauthorized e-commerce platform access and data breaches." +regex = '''shpat_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpat_"] + +[[rules]] +id = "shopify-custom-access-token" +description = "Detected a Shopify custom access token, potentially compromising custom app integrations and e-commerce data security." +regex = '''shpca_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpca_"] + +[[rules]] +id = "shopify-private-app-access-token" +description = "Identified a Shopify private app access token, risking unauthorized access to private app data and store operations." +regex = '''shppa_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shppa_"] + +[[rules]] +id = "shopify-shared-secret" +description = "Found a Shopify shared secret, posing a risk to application authentication and e-commerce platform security." +regex = '''shpss_[a-fA-F0-9]{32}''' +entropy = 2 +keywords = ["shpss_"] + +[[rules]] +id = "sidekiq-secret" +description = "Discovered a Sidekiq Secret, which could lead to compromised background job processing and application data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:BUNDLE_ENTERPRISE__CONTRIBSYS__COM|BUNDLE_GEMS__CONTRIBSYS__COM)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-f0-9]{8}:[a-f0-9]{8})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = [ + "bundle_enterprise__contribsys__com", + "bundle_gems__contribsys__com", +] + +[[rules]] +id = "sidekiq-sensitive-url" +description = "Uncovered a Sidekiq Sensitive URL, potentially exposing internal job queues and sensitive operation details." +regex = '''(?i)\bhttps?://([a-f0-9]{8}:[a-f0-9]{8})@(?:gems.contribsys.com|enterprise.contribsys.com)(?:[\/|\#|\?|:]|$)''' +keywords = [ + "gems.contribsys.com", + "enterprise.contribsys.com", +] + +[[rules]] +id = "slack-app-token" +description = "Detected a Slack App-level token, risking unauthorized access to Slack applications and workspace data." +regex = '''(?i)xapp-\d-[A-Z0-9]+-\d+-[a-z0-9]+''' +entropy = 2 +keywords = ["xapp"] + +[[rules]] +id = "slack-bot-token" +description = "Identified a Slack Bot token, which may compromise bot integrations and communication channel security." +regex = '''xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*''' +entropy = 3 +keywords = ["xoxb"] + +[[rules]] +id = "slack-config-access-token" +description = "Found a Slack Configuration access token, posing a risk to workspace configuration and sensitive data access." +regex = '''(?i)xoxe.xox[bp]-\d-[A-Z0-9]{163,166}''' +entropy = 2 +keywords = [ + "xoxe.xoxb-", + "xoxe.xoxp-", +] + +[[rules]] +id = "slack-config-refresh-token" +description = "Discovered a Slack Configuration refresh token, potentially allowing prolonged unauthorized access to configuration settings." +regex = '''(?i)xoxe-\d-[A-Z0-9]{146}''' +entropy = 2 +keywords = ["xoxe-"] + +[[rules]] +id = "slack-legacy-bot-token" +description = "Uncovered a Slack Legacy bot token, which could lead to compromised legacy bot operations and data exposure." +regex = '''xoxb-[0-9]{8,14}-[a-zA-Z0-9]{18,26}''' +entropy = 2 +keywords = ["xoxb"] + +[[rules]] +id = "slack-legacy-token" +description = "Detected a Slack Legacy token, risking unauthorized access to older Slack integrations and user data." +regex = '''xox[os]-\d+-\d+-\d+-[a-fA-F\d]+''' +entropy = 2 +keywords = [ + "xoxo", + "xoxs", +] + +[[rules]] +id = "slack-legacy-workspace-token" +description = "Identified a Slack Legacy Workspace token, potentially compromising access to workspace data and legacy features." +regex = '''xox[ar]-(?:\d-)?[0-9a-zA-Z]{8,48}''' +entropy = 2 +keywords = [ + "xoxa", + "xoxr", +] + +[[rules]] +id = "slack-user-token" +description = "Found a Slack User token, posing a risk of unauthorized user impersonation and data access within Slack workspaces." +regex = '''xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}''' +entropy = 2 +keywords = [ + "xoxp-", + "xoxe-", +] + +[[rules]] +id = "slack-webhook-url" +description = "Discovered a Slack Webhook, which could lead to unauthorized message posting and data leakage in Slack channels." +regex = '''(?:https?://)?hooks.slack.com/(?:services|workflows|triggers)/[A-Za-z0-9+/]{43,56}''' +keywords = ["hooks.slack.com"] + +[[rules]] +id = "snyk-api-token" +description = "Uncovered a Snyk API token, potentially compromising software vulnerability scanning and code security." +regex = '''(?i)[\w.-]{0,50}?(?:snyk[_.-]?(?:(?:api|oauth)[_.-]?)?(?:key|token))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["snyk"] + +[[rules]] +id = "sonar-api-token" +description = "Uncovered a Sonar API token, potentially compromising software vulnerability scanning and code security." +regex = '''(?i)[\w.-]{0,50}?(?:sonar[_.-]?(login|token))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9=_\-]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["sonar"] + +[[rules]] +id = "sourcegraph-access-token" +description = "Sourcegraph is a code search and navigation engine." +regex = '''(?i)\b(\b(sgp_(?:[a-fA-F0-9]{16}|local)_[a-fA-F0-9]{40}|sgp_[a-fA-F0-9]{40}|[a-fA-F0-9]{40})\b)(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = [ + "sgp_", + "sourcegraph", +] + +[[rules]] +id = "square-access-token" +description = "Detected a Square Access Token, risking unauthorized payment processing and financial transaction exposure." +regex = '''\b((?:EAAA|sq0atp-)[\w-]{22,60})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sq0atp-", + "eaaa", +] + +[[rules]] +id = "squarespace-access-token" +description = "Identified a Squarespace Access Token, which may compromise website management and content control on Squarespace." +regex = '''(?i)[\w.-]{0,50}?(?:squarespace)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["squarespace"] + +[[rules]] +id = "stripe-access-token" +description = "Found a Stripe Access Token, posing a risk to payment processing services and sensitive financial data." +regex = '''\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 2 +keywords = [ + "sk_test", + "sk_live", + "sk_prod", + "rk_test", + "rk_live", + "rk_prod", +] + +[[rules]] +id = "sumologic-access-id" +description = "Discovered a SumoLogic Access ID, potentially compromising log management services and data analytics integrity." +regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Ss]umo|SUMO))(?:[ \t\w.-]{0,20})[\s'"]{0,3})(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(su[a-zA-Z0-9]{12})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sumo"] + +[[rules]] +id = "sumologic-access-token" +description = "Uncovered a SumoLogic Access Token, which could lead to unauthorized access to log data and analytics insights." +regex = '''(?i)[\w.-]{0,50}?(?:(?-i:[Ss]umo|SUMO))(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{64})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3 +keywords = ["sumo"] + +[[rules]] +id = "telegram-bot-api-token" +description = "Detected a Telegram Bot API Token, risking unauthorized bot operations and message interception on Telegram." +regex = '''(?i)[\w.-]{0,50}?(?:telegr)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{5,16}:(?-i:A)[a-z0-9_\-]{34})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["telegr"] + +[[rules]] +id = "travisci-access-token" +description = "Identified a Travis CI Access Token, potentially compromising continuous integration services and codebase security." +regex = '''(?i)[\w.-]{0,50}?(?:travis)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{22})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["travis"] + +[[rules]] +id = "twilio-api-key" +description = "Found a Twilio API Key, posing a risk to communication services and sensitive customer interaction data." +regex = '''SK[0-9a-fA-F]{32}''' +entropy = 3 +keywords = ["sk"] + +[[rules]] +id = "twitch-api-token" +description = "Discovered a Twitch API token, which could compromise streaming services and account integrations." +regex = '''(?i)[\w.-]{0,50}?(?:twitch)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{30})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitch"] + +[[rules]] +id = "twitter-access-secret" +description = "Uncovered a Twitter Access Secret, potentially risking unauthorized Twitter integrations and data breaches." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{45})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-access-token" +description = "Detected a Twitter Access Token, posing a risk of unauthorized account operations and social media data exposure." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([0-9]{15,25}-[a-zA-Z0-9]{20,40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-api-key" +description = "Identified a Twitter API Key, which may compromise Twitter application integrations and user data security." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{25})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-api-secret" +description = "Found a Twitter API Secret, risking the security of Twitter app integrations and sensitive data access." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{50})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "twitter-bearer-token" +description = "Discovered a Twitter Bearer Token, potentially compromising API access and data retrieval from Twitter." +regex = '''(?i)[\w.-]{0,50}?(?:twitter)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(A{22}[a-zA-Z0-9%]{80,100})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["twitter"] + +[[rules]] +id = "typeform-api-token" +description = "Uncovered a Typeform API token, which could lead to unauthorized survey management and data collection." +regex = '''(?i)[\w.-]{0,50}?(?:typeform)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(tfp_[a-z0-9\-_\.=]{59})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["tfp_"] + +[[rules]] +id = "vault-batch-token" +description = "Detected a Vault Batch Token, risking unauthorized access to secret management services and sensitive data." +regex = '''\b(hvb\.[\w-]{138,300})(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 4 +keywords = ["hvb."] + +[[rules]] +id = "vault-service-token" +description = "Identified a Vault Service Token, potentially compromising infrastructure security and access to sensitive credentials." +regex = '''\b((?:hvs\.[\w-]{90,120}|s\.(?i:[a-z0-9]{24})))(?:[\x60'"\s;]|\\[nr]|$)''' +entropy = 3.5 +keywords = [ + "hvs.", + "s.", +] +[[rules.allowlists]] +regexes = [ + '''s\.[A-Za-z]{24}''', +] + +[[rules]] +id = "yandex-access-token" +description = "Found a Yandex Access Token, posing a risk to Yandex service integrations and user data privacy." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(t1\.[A-Z0-9a-z_-]+[=]{0,2}\.[A-Z0-9a-z_-]{86}[=]{0,2})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "yandex-api-key" +description = "Discovered a Yandex API Key, which could lead to unauthorized access to Yandex services and data manipulation." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(AQVN[A-Za-z0-9_\-]{35,38})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "yandex-aws-access-token" +description = "Uncovered a Yandex AWS Access Token, potentially compromising cloud resource access and data security on Yandex Cloud." +regex = '''(?i)[\w.-]{0,50}?(?:yandex)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}(YC[a-zA-Z0-9_\-]{38})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["yandex"] + +[[rules]] +id = "zendesk-secret-key" +description = "Detected a Zendesk Secret Key, risking unauthorized access to customer support services and sensitive ticketing data." +regex = '''(?i)[\w.-]{0,50}?(?:zendesk)(?:[ \t\w.-]{0,20})[\s'"]{0,3}(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)[\x60'"\s=]{0,5}([a-z0-9]{40})(?:[\x60'"\s;]|\\[nr]|$)''' +keywords = ["zendesk"] From dc258e041ce472a76f7b9f8550ed271312f26648 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 15:47:23 +0530 Subject: [PATCH 06/28] SK-2292 retry on connection --- skyflow/vault/_client.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e426f59f..b9d858fa 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -45,7 +45,6 @@ def __init__(self, config: Configuration): def insert(self, records: dict, options: InsertOptions = InsertOptions()): interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) - self._checkConfig(interface) jsonBody = getInsertRequestBody(records, options) @@ -57,16 +56,35 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "sky-metadata": json.dumps(getMetrics()) } - response = requests.post(requestURL, data=jsonBody, headers=headers) - processedResponse = processResponse(response) - result, partial = convertResponse(records, processedResponse, options) - if partial: - log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: - log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) - return result + # Use for-loop for retry logic, avoid code repetition + for attempt in range(2): + try: + # If jsonBody is a dict, use json=, else use data= + if isinstance(jsonBody, dict): + response = requests.post(requestURL, json=jsonBody, headers=headers) + else: + response = requests.post(requestURL, data=jsonBody, headers=headers) + processedResponse = processResponse(response) + result, partial = convertResponse(records, processedResponse, options) + if partial: + log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: + log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + return result + except requests.exceptions.ConnectionError as err: + log_error(f'Connection error inserting record: {err}', interface) + if attempt == 0: + log_info("Retrying record...", interface) + continue + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Connection error after retry: {err}", interface=interface) + except Exception as err: + log_error(f'Unexpected error in insert: {err}', interface) + raise + def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From adafc1cc1b618f4d26c868fb764b490a92147580 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 15:51:48 +0530 Subject: [PATCH 07/28] SK-2292 set default retry --- skyflow/vault/_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index b9d858fa..58e28d05 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -55,9 +55,9 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "Authorization": "Bearer " + self.storedToken, "sky-metadata": json.dumps(getMetrics()) } - + max_retries = 1 # Use for-loop for retry logic, avoid code repetition - for attempt in range(2): + for attempt in range(max_retries): try: # If jsonBody is a dict, use json=, else use data= if isinstance(jsonBody, dict): From b3d791cafb37e6e1d0748dde5b389dd07d737178 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 18:48:11 +0530 Subject: [PATCH 08/28] SK-2293 max retry set to 3 --- skyflow/vault/_client.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 58e28d05..785c42eb 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -55,15 +55,12 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): "Authorization": "Bearer " + self.storedToken, "sky-metadata": json.dumps(getMetrics()) } - max_retries = 1 + max_retries = 3 # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries): + for attempt in range(max_retries+1): try: # If jsonBody is a dict, use json=, else use data= - if isinstance(jsonBody, dict): - response = requests.post(requestURL, json=jsonBody, headers=headers) - else: - response = requests.post(requestURL, data=jsonBody, headers=headers) + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: @@ -75,16 +72,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result except requests.exceptions.ConnectionError as err: - log_error(f'Connection error inserting record: {err}', interface) - if attempt == 0: - log_info("Retrying record...", interface) + if attempt < max_retries: continue else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Connection error after retry: {err}", interface=interface) - except Exception as err: - log_error(f'Unexpected error in insert: {err}', interface) - raise - + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From bb6c87450d2c2a82f0a1ab305f01979ce0e616f2 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 14:30:46 +0000 Subject: [PATCH 09/28] [AUTOMATED] Public Release - 1.15.2 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c7756728..89ba87d0 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.1' +current_version = '1.15.2' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 4d6b1a07..82208a4e 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.1' \ No newline at end of file +SDK_VERSION = '1.15.2' \ No newline at end of file From 5700874e2eda09325f94dd3c561c2d0b9d7cad87 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 21:52:59 +0530 Subject: [PATCH 10/28] SK-2293 retry on every exception --- skyflow/vault/_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 785c42eb..1c1236c2 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -71,7 +71,7 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: continue else: From 3b47b5ca3d6d4da3e2819f690f4924e6933a8f55 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 16:31:59 +0000 Subject: [PATCH 11/28] [AUTOMATED] Public Release - 1.15.3 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 89ba87d0..9351f06a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.2' +current_version = '1.15.3' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 82208a4e..30ae0eae 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.2' \ No newline at end of file +SDK_VERSION = '1.15.3' \ No newline at end of file From 922f98c977145227dc50b4117243557cf994dc48 Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 22:39:07 +0530 Subject: [PATCH 12/28] SK-2293 retry on every exception --- skyflow/vault/_client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 1c1236c2..e21dcbab 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -75,7 +75,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): if attempt < max_retries: continue else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From 450db9d8e16bb6194b7e4a9a536e9e96ab607d1a Mon Sep 17 00:00:00 2001 From: skyflow-bharti Date: Fri, 12 Sep 2025 17:15:17 +0000 Subject: [PATCH 13/28] [AUTOMATED] Public Release - 1.15.4 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9351f06a..323fa31d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.3' +current_version = '1.15.4' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 30ae0eae..5c7ae5de 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.3' \ No newline at end of file +SDK_VERSION = '1.15.4' \ No newline at end of file From 57d594b4b464a448b653d25086a4ce9b37873ad2 Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Thu, 18 Sep 2025 23:10:18 +0530 Subject: [PATCH 14/28] SK-2296: Release/25.9.14 (#200) * Merge pull request #199 from skyflowapi/skyflow-vivek/SK-2296-use-sessions-for-insert SK-2296 Use sessions for insert method * [AUTOMATED] Private Release 1.15.4.dev0+1c45049 --------- Co-authored-by: skyflow-vivek <121149418+skyflow-vivek@users.noreply.github.com> Co-authored-by: skyflow-shravan --- .github/workflows/internal-release.yml | 22 ++++++ .github/workflows/shared-build-and-deploy.yml | 74 +++++++++++++++++++ ci-scripts/bump_version.sh | 13 ++-- setup.py | 2 +- skyflow/_utils.py | 1 + skyflow/errors/_skyflow_errors.py | 1 + skyflow/vault/_client.py | 69 +++++++++++------ skyflow/version.py | 2 +- 8 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/internal-release.yml create mode 100644 .github/workflows/shared-build-and-deploy.yml diff --git a/.github/workflows/internal-release.yml b/.github/workflows/internal-release.yml new file mode 100644 index 00000000..cafdb9c9 --- /dev/null +++ b/.github/workflows/internal-release.yml @@ -0,0 +1,22 @@ +name: Internal Release + +on: + push: + tags-ignore: + - '*.*' + paths-ignore: + - "setup.py" + - "*.yml" + - "*.md" + - "skyflow/version.py" + - "samples/**" + branches: + - release/* + +jobs: + build-and-deploy: + uses: ./.github/workflows/shared-build-and-deploy.yml + with: + ref: ${{ github.ref_name }} + tag: 'internal' + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/shared-build-and-deploy.yml b/.github/workflows/shared-build-and-deploy.yml new file mode 100644 index 00000000..cc6ac048 --- /dev/null +++ b/.github/workflows/shared-build-and-deploy.yml @@ -0,0 +1,74 @@ +name: Shared Build and Deploy + +on: + workflow_call: + inputs: + ref: + description: 'Git reference to use (e.g., main or branch name)' + required: true + type: string + + tag: + description: 'Release Tag' + required: true + type: string + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Resolve Branch for the Tagged Commit + id: resolve-branch + if: ${{ inputs.tag == 'beta' || inputs.tag == 'public' }} + run: | + TAG_COMMIT=$(git rev-list -n 1 ${{ github.ref_name }}) + BRANCH_NAME=$(git branch -r --contains $TAG_COMMIT | grep -o 'origin/.*' | sed 's|origin/||' | head -n 1) + if [ -z "$BRANCH_NAME" ]; then + echo "Error: Could not resolve branch for the tag." + exit 1 + fi + echo "Resolved Branch Name: $BRANCH_NAME" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_ENV + - name: Get Previous tag + id: previoustag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: 1.0.0 + + - name: Bump Version + run: | + chmod +x ./ci-scripts/bump_version.sh + if ${{ inputs.tag == 'internal' }}; then + ./ci-scripts/bump_version.sh "${{ steps.previoustag.outputs.tag }}" "$(git rev-parse --short "$GITHUB_SHA")" + else + ./ci-scripts/bump_version.sh "${{ steps.previoustag.outputs.tag }}" + fi + - name: Commit changes + run: | + git config user.name "${{ github.actor }}" + git config user.email "${{ github.actor }}@users.noreply.github.com" + git add setup.py + git add skyflow/version.py + if [[ "${{ inputs.tag }}" == "internal" ]]; then + VERSION="${{ steps.previoustag.outputs.tag }}.dev0+$(git rev-parse --short $GITHUB_SHA)" + COMMIT_MESSAGE="[AUTOMATED] Private Release $VERSION" + git commit -m "$COMMIT_MESSAGE" + git push origin ${{ github.ref_name }} -f + fi + - name: Build and Publish to JFrog Artifactory + if: ${{ inputs.tag == 'internal' }} + env: + TWINE_USERNAME: ${{ secrets.JFROG_USERNAME }} + TWINE_PASSWORD: ${{ secrets.JFROG_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload --repository-url https://prekarilabs.jfrog.io/artifactory/api/pypi/skyflow-python/ dist/* \ No newline at end of file diff --git a/ci-scripts/bump_version.sh b/ci-scripts/bump_version.sh index b0a57a9e..c8f2b9e9 100755 --- a/ci-scripts/bump_version.sh +++ b/ci-scripts/bump_version.sh @@ -1,22 +1,19 @@ Version=$1 SEMVER=$Version - if [ -z $2 ] then echo "Bumping package version to $1" - sed -E "s/current_version = .+/current_version = \'$SEMVER\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile - echo -------------------------- echo "Done, Package now at $1" else - echo "Bumping package version to $1-dev.$2" + echo "Bumping package version to $1.dev0+$2" - sed -E "s/current_version = .+/current_version = \'$SEMVER-dev.$2\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile - sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER-dev.$2\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile + sed -E "s/current_version = .+/current_version = \'$SEMVER.dev0+$2\'/g" setup.py > tempfile && cat tempfile > setup.py && rm -f tempfile + sed -E "s/SDK_VERSION = .+/SDK_VERSION = \'$SEMVER.dev0+$2\'/g" skyflow/version.py > tempfile && cat tempfile > skyflow/version.py && rm -f tempfile echo -------------------------- - echo "Done, Package now at $1-dev.$2" -fi + echo "Done, Package now at $1.dev0+$2" +fi \ No newline at end of file diff --git a/setup.py b/setup.py index 323fa31d..c83828ab 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.4' +current_version = '1.15.4.dev0+1c45049' setup( name='skyflow', diff --git a/skyflow/_utils.py b/skyflow/_utils.py index 83bf54a6..1e1b7109 100644 --- a/skyflow/_utils.py +++ b/skyflow/_utils.py @@ -53,6 +53,7 @@ def log_error(message: str, interface: str): class InfoMessages(Enum): INITIALIZE_CLIENT = "Initializing skyflow client" CLIENT_INITIALIZED = "Initialized skyflow client successfully" + CLOSING_SESSION = "Closing the session" VALIDATE_INSERT_RECORDS = "Validating insert records" VALIDATE_DETOKENIZE_INPUT = "Validating detokenize input" VALIDATE_GET_BY_ID_INPUT = "Validating getByID input" diff --git a/skyflow/errors/_skyflow_errors.py b/skyflow/errors/_skyflow_errors.py index 2e792812..ba52c4e5 100644 --- a/skyflow/errors/_skyflow_errors.py +++ b/skyflow/errors/_skyflow_errors.py @@ -16,6 +16,7 @@ class SkyflowErrorCodes(Enum): class SkyflowErrorMessages(Enum): API_ERROR = "Server returned status code %s" + NETWORK_ERROR = "Network error occurred: %s" FILE_NOT_FOUND = "File at %s not found" FILE_INVALID_JSON = "File at %s is not in JSON format" diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e21dcbab..97979505 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,6 +5,7 @@ import types import requests import asyncio +from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -36,49 +37,71 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) + self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) + + def _create_session(self): + self.session = requests.Session() + adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) + self.session.mount("https://", adapter) + + def __del__(self): + if (self.session is not None): + log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) + self.session.close() + self.session = None + + def _get_session(self): + if (self.session is None): + self._create_session() + return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): + max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) - jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()) - } - max_retries = 3 - # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries+1): + + for attempt in range(max_retries + 1): try: - # If jsonBody is a dict, use json=, else use data= - response = requests.post(requestURL, data=jsonBody, headers=headers) + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()), + } + response = self._get_session().post( + requestURL, + data=jsonBody, + headers=headers, + ) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except Exception as err: + except requests.exceptions.ConnectionError as err: if attempt < max_retries: - continue - else: - if isinstance(err, SkyflowError): - raise err - else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + continue + raise SkyflowError( + SkyflowErrorCodes.SERVER_ERROR, + SkyflowErrorMessages.NETWORK_ERROR.value % str(err), + interface=interface + ) + except SkyflowError as err: + if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: + raise err + continue def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value diff --git a/skyflow/version.py b/skyflow/version.py index 5c7ae5de..fa38ca14 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.4' \ No newline at end of file +SDK_VERSION = '1.15.4.dev0+1c45049' \ No newline at end of file From d2979c3af35aad2d895748ea34017945c6b6bf57 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Thu, 18 Sep 2025 17:42:36 +0000 Subject: [PATCH 15/28] [AUTOMATED] Public Release - 1.15.5 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c83828ab..b62c0129 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.4.dev0+1c45049' +current_version = '1.15.5' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index fa38ca14..461f4702 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.4.dev0+1c45049' \ No newline at end of file +SDK_VERSION = '1.15.5' \ No newline at end of file From 3bb2b6a715b20cf707e3941e144cf7f5fddf6788 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Mon, 22 Sep 2025 13:26:09 +0000 Subject: [PATCH 16/28] [AUTOMATED] Public Release - 1.15.6 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b62c0129..6c48fc93 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.5' +current_version = '1.15.6' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 461f4702..491b0797 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.5' \ No newline at end of file +SDK_VERSION = '1.15.6' \ No newline at end of file From c1172852c2fed6fed77fcaad83f16040ac996030 Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:14:53 +0530 Subject: [PATCH 17/28] SK-2313 Release/25.9.15 (#203) * SK-2131 add retry for errors (#202) --------- Co-authored-by: skyflow-shravan --- setup.py | 2 +- skyflow/vault/_client.py | 69 ++++++++++++++-------------------------- skyflow/version.py | 2 +- 3 files changed, 25 insertions(+), 48 deletions(-) diff --git a/setup.py b/setup.py index 6c48fc93..ae32d111 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6' +current_version = '1.15.6.dev0+fcfb86f' setup( name='skyflow', diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 97979505..e21dcbab 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,7 +5,6 @@ import types import requests import asyncio -from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -37,71 +36,49 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) - self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) - - def _create_session(self): - self.session = requests.Session() - adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) - self.session.mount("https://", adapter) - - def __del__(self): - if (self.session is not None): - log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) - self.session.close() - self.session = None - - def _get_session(self): - if (self.session is None): - self._create_session() - return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): - max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) + jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - - for attempt in range(max_retries + 1): + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()) + } + max_retries = 3 + # Use for-loop for retry logic, avoid code repetition + for attempt in range(max_retries+1): try: - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()), - } - response = self._get_session().post( - requestURL, - data=jsonBody, - headers=headers, - ) + # If jsonBody is a dict, use json=, else use data= + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: - continue - raise SkyflowError( - SkyflowErrorCodes.SERVER_ERROR, - SkyflowErrorMessages.NETWORK_ERROR.value % str(err), - interface=interface - ) - except SkyflowError as err: - if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: - raise err - continue + continue + else: + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value diff --git a/skyflow/version.py b/skyflow/version.py index 491b0797..ce8f9cad 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6' \ No newline at end of file +SDK_VERSION = '1.15.6.dev0+fcfb86f' \ No newline at end of file From ada8b744ad2e322937aa5646647a5e2b73ac4c4c Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:16:44 +0530 Subject: [PATCH 18/28] SK-2131 minor fix for remote disconnect error (#204) * SK-2131 minor fix for remote disconnect error * SK-2131 change version --- setup.py | 2 +- skyflow/vault/_client.py | 71 ++++++++++++++++++++++++++-------------- skyflow/version.py | 2 +- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/setup.py b/setup.py index ae32d111..6c48fc93 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6.dev0+fcfb86f' +current_version = '1.15.6' setup( name='skyflow', diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index e21dcbab..96d86232 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,6 +5,7 @@ import types import requests import asyncio +from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -36,49 +37,71 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) + self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) + + def _create_session(self): + self.session = requests.Session() + adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) + self.session.mount("https://", adapter) + + def __del__(self): + if (self.session is not None): + log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) + self.session.close() + self.session = None + + def _get_session(self): + if (self.session is None): + self._create_session() + return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): + max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) - jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()) - } - max_retries = 3 - # Use for-loop for retry logic, avoid code repetition - for attempt in range(max_retries+1): + + for attempt in range(max_retries + 1): try: - # If jsonBody is a dict, use json=, else use data= - response = requests.post(requestURL, data=jsonBody, headers=headers) + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()), + } + response = self._get_session().post( + requestURL, + data=jsonBody, + headers=headers, + ) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except Exception as err: + except requests.exceptions.ConnectionError as err: if attempt < max_retries: - continue - else: - if isinstance(err, SkyflowError): - raise err - else: - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) + continue + raise SkyflowError( + SkyflowErrorCodes.SERVER_ERROR, + SkyflowErrorMessages.NETWORK_ERROR.value % str(err), + interface=interface + ) + except SkyflowError as err: + if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: + raise err + continue def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value @@ -292,4 +315,4 @@ def delete(self, records: dict, options: DeleteOptions = DeleteOptions()): else: log_info(InfoMessages.DELETE_DATA_SUCCESS.value, interface) - return result + return result \ No newline at end of file diff --git a/skyflow/version.py b/skyflow/version.py index ce8f9cad..491b0797 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6.dev0+fcfb86f' \ No newline at end of file +SDK_VERSION = '1.15.6' \ No newline at end of file From b4bd12985c4d2600182f11a5880abed49967bae7 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Tue, 23 Sep 2025 08:19:08 +0000 Subject: [PATCH 19/28] [AUTOMATED] Public Release - 1.16.0 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6c48fc93..2ea897fc 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.6' +current_version = '1.16.0' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 491b0797..2bac4cd2 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.6' \ No newline at end of file +SDK_VERSION = '1.16.0' \ No newline at end of file From 10350bcc0110a8e6c17e1921fbd74e873b7e4a7e Mon Sep 17 00:00:00 2001 From: skyflow-shravan <121150537+skyflow-shravan@users.noreply.github.com> Date: Wed, 24 Sep 2025 00:12:45 +0530 Subject: [PATCH 20/28] SK-2131 retry for errors in insert (#205) --- skyflow/vault/_client.py | 68 +++++++++++++--------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index 96d86232..f3e02def 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -5,7 +5,6 @@ import types import requests import asyncio -from requests.adapters import HTTPAdapter from skyflow.vault._insert import getInsertRequestBody, processResponse, convertResponse from skyflow.vault._update import sendUpdateRequests, createUpdateResponseBody from skyflow.vault._config import Configuration, ConnectionConfig, DeleteOptions, DetokenizeOptions, GetOptions, InsertOptions, UpdateOptions, QueryOptions @@ -37,71 +36,48 @@ def __init__(self, config: Configuration): raise SkyflowError(SkyflowErrorCodes.INVALID_INPUT, SkyflowErrorMessages.TOKEN_PROVIDER_ERROR.value % ( str(type(config.tokenProvider))), interface=interface) - self._create_session() self.vaultID = config.vaultID self.vaultURL = config.vaultURL.rstrip('/') self.tokenProvider = config.tokenProvider self.storedToken = '' log_info(InfoMessages.CLIENT_INITIALIZED.value, interface=interface) - - def _create_session(self): - self.session = requests.Session() - adapter = HTTPAdapter(pool_connections=1, pool_maxsize=25, pool_block=True) - self.session.mount("https://", adapter) - - def __del__(self): - if (self.session is not None): - log_info(InfoMessages.CLOSING_SESSION.value, interface=InterfaceName.CLIENT.value) - self.session.close() - self.session = None - - def _get_session(self): - if (self.session is None): - self._create_session() - return self.session def insert(self, records: dict, options: InsertOptions = InsertOptions()): - max_retries = 1 interface = InterfaceName.INSERT.value log_info(InfoMessages.INSERT_TRIGGERED.value, interface=interface) self._checkConfig(interface) jsonBody = getInsertRequestBody(records, options) requestURL = self._get_complete_vault_url() - - for attempt in range(max_retries + 1): + self.storedToken = tokenProviderWrapper( + self.storedToken, self.tokenProvider, interface) + headers = { + "Authorization": "Bearer " + self.storedToken, + "sky-metadata": json.dumps(getMetrics()) + } + max_retries = 3 + # Use for-loop for retry logic, avoid code repetition + for attempt in range(max_retries+1): try: - self.storedToken = tokenProviderWrapper( - self.storedToken, self.tokenProvider, interface) - headers = { - "Authorization": "Bearer " + self.storedToken, - "sky-metadata": json.dumps(getMetrics()), - } - response = self._get_session().post( - requestURL, - data=jsonBody, - headers=headers, - ) + # If jsonBody is a dict, use json=, else use data= + response = requests.post(requestURL, data=jsonBody, headers=headers) processedResponse = processResponse(response) result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - elif 'records' not in result: + raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) + if 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - else: - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result - except requests.exceptions.ConnectionError as err: + except Exception as err: if attempt < max_retries: - continue - raise SkyflowError( - SkyflowErrorCodes.SERVER_ERROR, - SkyflowErrorMessages.NETWORK_ERROR.value % str(err), - interface=interface - ) - except SkyflowError as err: - if err.code != SkyflowErrorCodes.SERVER_ERROR or attempt >= max_retries: - raise err - continue + continue + else: + if isinstance(err, SkyflowError): + raise err + else: + raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, f"Error occurred: {err}", interface=interface) def detokenize(self, records: dict, options: DetokenizeOptions = DetokenizeOptions()): interface = InterfaceName.DETOKENIZE.value From abd48ff9694c73e5a310372ba93dbe2967dbab14 Mon Sep 17 00:00:00 2001 From: skyflow-shravan Date: Tue, 23 Sep 2025 18:48:05 +0000 Subject: [PATCH 21/28] [AUTOMATED] Public Release - 1.15.7 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2ea897fc..fbb55fbb 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.16.0' +current_version = '1.15.7' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 2bac4cd2..9a299b01 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.16.0' \ No newline at end of file +SDK_VERSION = '1.15.7' \ No newline at end of file From 44e611cec71020b8f30c61313403295c6dd5c525 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Mon, 29 Sep 2025 19:41:55 +0530 Subject: [PATCH 22/28] SK-2325 Fix retry logic when continueOnError is set to true --- skyflow/vault/_client.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/skyflow/vault/_client.py b/skyflow/vault/_client.py index f3e02def..24ba524f 100644 --- a/skyflow/vault/_client.py +++ b/skyflow/vault/_client.py @@ -64,11 +64,10 @@ def insert(self, records: dict, options: InsertOptions = InsertOptions()): result, partial = convertResponse(records, processedResponse, options) if partial: log_error(SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, interface) - raise SkyflowError(SkyflowErrorCodes.PARTIAL_SUCCESS, SkyflowErrorMessages.BATCH_INSERT_PARTIAL_SUCCESS.value, result, interface=interface) - if 'records' not in result: + elif 'records' not in result: log_error(SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, interface) - raise SkyflowError(SkyflowErrorCodes.SERVER_ERROR, SkyflowErrorMessages.BATCH_INSERT_FAILURE.value, result, interface=interface) - log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) + else: + log_info(InfoMessages.INSERT_DATA_SUCCESS.value, interface) return result except Exception as err: if attempt < max_retries: From d6cbc682b7c203db718a5eeab06ffe695d131f87 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Mon, 29 Sep 2025 15:18:20 +0000 Subject: [PATCH 23/28] [AUTOMATED] Private Release 1.15.7.dev0+cce9db4 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index fbb55fbb..5d6e3251 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7' +current_version = '1.15.7.dev0+cce9db4' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 9a299b01..733b2c9d 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+cce9db4' \ No newline at end of file From b69c3574b4848dd6e3f1b30a6f1a6018cf1dbc25 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 17:51:56 +0530 Subject: [PATCH 24/28] SK-2325 Added unit tests for fix --- tests/vault/test_insert_mocked.py | 239 ++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 tests/vault/test_insert_mocked.py diff --git a/tests/vault/test_insert_mocked.py b/tests/vault/test_insert_mocked.py new file mode 100644 index 00000000..20be5f56 --- /dev/null +++ b/tests/vault/test_insert_mocked.py @@ -0,0 +1,239 @@ +''' + Copyright (c) 2022 Skyflow, Inc. +''' +import json +import unittest +from unittest.mock import Mock, patch, ANY +import os +from dotenv import dotenv_values +import requests +from requests.models import Response +from skyflow.errors._skyflow_errors import SkyflowError +from skyflow.vault._client import Client +from skyflow.vault._config import Configuration, InsertOptions, BYOT + +class TestInsertWithMocks(unittest.TestCase): + def setUp(self) -> None: + self.envValues = dotenv_values(".env") + self.dataPath = os.path.join(os.getcwd(), 'tests/vault/data/') + self.valid_token = self.envValues["MOCK_TOKEN"] + self.record = { + "table": "pii_fields", + "fields": { + "cardNumber": "4111-1111-1111-1111", + "cvv": "234" + } + } + self.data = {"records": [self.record, self.record]} + + # Mock API response data + self.mock_success_response = { + "responses": [ + { + "records": [ + { + "skyflow_id": "123", + "tokens": { + "cardNumber": "card_number_token", + "cvv": "cvv_token" + } + } + ] + }, + { + "records": [ + { + "skyflow_id": "456", + "tokens": { + "cardNumber": "card_number_token", + "cvv": "cvv_token" + } + } + ] + }, + ], + "requestId": "test-request-id" + } + + self.mock_error_response = { + "error": { + "grpc_code": 3, + "http_code": 400, + "message": "Insert failed due to error.", + "http_status": "Bad Request" + } + } + + # Create configurations for testing with different token scenarios + self.valid_config = Configuration( + 'test-vault-id', + 'https://test-vault.skyflow.com', + lambda: self.valid_token + ) + + self.invalid_config = Configuration( + 'test-vault-id', + 'https://test-vault.skyflow.com', + lambda: 'invalid-token' + ) + + @patch('requests.post') + def test_successful_insert(self, mock_post): + # Setup mock response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.content = json.dumps(self.mock_success_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response # Create client and perform insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + result = client.insert(self.data, options) + + # Verify the result + self.assertIn("records", result) + self.assertEqual(len(result["records"]), 2) + self.assertEqual(result["records"][0]["fields"]["cardNumber"], "card_number_token") + + # Verify the API was called with correct parameters + mock_post.assert_called_once() + called_url = mock_post.call_args[0][0] + self.assertTrue(called_url.endswith("/v1/vaults/test-vault-id")) + + @patch('requests.post') + def test_insert_api_error(self, mock_post): + # Setup mock error response + mock_response = Mock(spec=Response) + mock_response.status_code = 400 + mock_response.content = json.dumps(self.mock_error_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + + # Mock raise_for_status to raise HTTPError + def raise_for_status(): + raise requests.exceptions.HTTPError("400 Client Error") + mock_response.raise_for_status = raise_for_status + + mock_post.return_value = mock_response + + # Create client and attempt insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + + # This should raise a SkyflowError + with self.assertRaises(SkyflowError) as context: + client.insert(self.data, options) + + # Verify the error details + self.assertEqual(context.exception.code, 400) + self.assertIn("Insert failed due to error", context.exception.message) + + @patch('requests.post') + def test_insert_network_error(self, mock_post): + # Setup mock to simulate network error + mock_post.side_effect = Exception("Network error") + + # Create client and attempt insert + client = Client(self.valid_config) + options = InsertOptions(tokens=True) + + # Assert that the insert raises an error + with self.assertRaises(SkyflowError) as context: + client.insert(self.data, options) + + @patch('requests.post') + def test_insert_with_continue_on_error_partial_sucess(self, mock_post): + # Setup mock response with partial success + partial_response = { + "responses": [ + { + "Body": { + "records": [ + { + "skyflow_id": "123", + "tokens": {"cardNumber": "token1"} + } + ] + }, + "Status": 200 + }, + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + } + ], + "requestId": "test-request-id" + } + + mock_response = Mock(spec=Response) + mock_response.status_code = 207 + mock_response.content = json.dumps(partial_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response + + # Create client and perform insert with continueOnError + client = Client(self.valid_config) + options = InsertOptions(tokens=True, continueOnError=True) + + # Create test data with two records + test_data = { + "records": [ + self.record, + self.record # Duplicate record that will cause error + ] + } + + result = client.insert(test_data, options) + + # Verify partial success results + self.assertIn("records", result) + self.assertIn("errors", result) + self.assertEqual(len(result["records"]), 1) + self.assertEqual(len(result["errors"]), 1) + + @patch('requests.post') + def test_insert_with_continue_on_error_complete_failure(self, mock_post): + # Setup mock response with complete failure + complete_failure_response = { + "responses": [ + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + }, + { + "Body": { + "error": "Unique constraint violation" + }, + "Status": 400 + } + ], + "requestId": "test-request-id" + } + + mock_response = Mock(spec=Response) + mock_response.status_code = 207 + mock_response.content = json.dumps(complete_failure_response).encode('utf-8') + mock_response.headers = {'x-request-id': 'test-request-id'} + mock_post.return_value = mock_response + + # Create client and perform insert with continueOnError + client = Client(self.valid_config) + options = InsertOptions(tokens=True, continueOnError=True) + + # Create test data with two records + test_data = { + "records": [ + self.record, + self.record # Duplicate record that will cause error + ] + } + + result = client.insert(test_data, options) + + # Verify complete failure results + self.assertIn("errors", result) + self.assertNotIn("records", result) + self.assertEqual(len(result["errors"]), 2) + From 1c6553f14d3a3149c8c370f7990e74ec51231c81 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 12:22:37 +0000 Subject: [PATCH 25/28] [AUTOMATED] Private Release 1.15.7.dev0+b69c357 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5d6e3251..d1bf86a4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+cce9db4' +current_version = '1.15.7.dev0+b69c357' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 733b2c9d..790a03a2 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+cce9db4' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+b69c357' \ No newline at end of file From f3c530ed2b4489215d1cf7e25b83ef1ac6a24666 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 18:49:52 +0530 Subject: [PATCH 26/28] SK-2325 Updated new env secret in workflows --- .github/workflows/ci.yml | 1 + .github/workflows/main.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06aa9a50..c447d526 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,6 +44,7 @@ jobs: echo SKYFLOW_ID1=${{secrets.SKYFLOW_ID1}} >> .env echo SKYFLOW_ID2=${{secrets.SKYFLOW_ID2}} >> .env echo SKYFLOW_ID3=${{secrets.SKYFLOW_ID3}} >> .env + echo MOCK_TOKEN=${{secrets.MOCK_TOKEN}} >> .env - name: 'Run Tests' run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 48472e78..b0bc7278 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -34,6 +34,7 @@ jobs: echo SKYFLOW_ID1=${{secrets.SKYFLOW_ID1}} >> .env echo SKYFLOW_ID2=${{secrets.SKYFLOW_ID2}} >> .env echo SKYFLOW_ID3=${{secrets.SKYFLOW_ID3}} >> .env + echo MOCK_TOKEN=${{secrets.MOCK_TOKEN}} >> .env - name: 'Run Tests' run: | From 35916c21fe7ab8620b6b600193991cfed8621918 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 13:20:13 +0000 Subject: [PATCH 27/28] [AUTOMATED] Private Release 1.15.7.dev0+f3c530e --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d1bf86a4..20c3abae 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+b69c357' +current_version = '1.15.7.dev0+f3c530e' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 790a03a2..9f437f19 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+b69c357' \ No newline at end of file +SDK_VERSION = '1.15.7.dev0+f3c530e' \ No newline at end of file From 706d0d2c0f63d0280a597bc686f583573c6a99e6 Mon Sep 17 00:00:00 2001 From: skyflow-vivek Date: Tue, 30 Sep 2025 13:38:46 +0000 Subject: [PATCH 28/28] [AUTOMATED] Public Release - 1.15.8 --- setup.py | 2 +- skyflow/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 20c3abae..dcfcfc22 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): raise RuntimeError("skyflow requires Python 3.7+") -current_version = '1.15.7.dev0+f3c530e' +current_version = '1.15.8' setup( name='skyflow', diff --git a/skyflow/version.py b/skyflow/version.py index 9f437f19..fdc039b8 100644 --- a/skyflow/version.py +++ b/skyflow/version.py @@ -1 +1 @@ -SDK_VERSION = '1.15.7.dev0+f3c530e' \ No newline at end of file +SDK_VERSION = '1.15.8' \ No newline at end of file