diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bf0918c..93d8e4f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,13 +54,14 @@ jobs: env: TUTOR_ROOT: ${{ github.workspace }}/strains/${{ inputs.STRAIN_PATH }} TUTOR_PLUGINS_ROOT: ${{ github.workspace }}/strains/${{ inputs.STRAIN_PATH }}/plugins + CONFIG_FILE: strains/${{ inputs.STRAIN_PATH }}/config.yml steps: - name: Checkout Picasso repository for utility scripts uses: actions/checkout@v4 with: repository: edunext/picasso - ref: main + ref: mjh/repo-and-syntx-analysis path: picasso - name: Checkout strains repository for build configurations @@ -73,14 +74,21 @@ jobs: - name: Install necessary dependencies run: | - pip install pyyaml + pip install pyyaml schema + + - name: Validate strain configuration + working-directory: ${{ github.workspace }} + env: + STRAIN_VALIDATION_SCRIPTS_PATH: picasso/.github/workflows/scripts/strain_validation + run: | + python $STRAIN_VALIDATION_SCRIPTS_PATH/validate_schema.py $CONFIG_FILE + python $STRAIN_VALIDATION_SCRIPTS_PATH/validate_repos.py $CONFIG_FILE - name: Get Tutor Configurations from config.yml and set them as an environment variable working-directory: ${{ github.workspace }} env: REQUIRED_KEYS: TUTOR_VERSION OPTIONAL_KEYS: DOCKER_REGISTRY - CONFIG_FILE: strains/${{ inputs.STRAIN_PATH }}/config.yml SCRIPT_PATH: picasso/.github/workflows/scripts/get_tutor_config.py run: | ENV_VARS=$(python $SCRIPT_PATH --config-file $CONFIG_FILE --required-keys $REQUIRED_KEYS --optional-keys $OPTIONAL_KEYS) diff --git a/.github/workflows/scripts/strain_validation/validate_repos.py b/.github/workflows/scripts/strain_validation/validate_repos.py new file mode 100644 index 0000000..0e5eb6b --- /dev/null +++ b/.github/workflows/scripts/strain_validation/validate_repos.py @@ -0,0 +1,99 @@ +""" +Validate repository URLs and configurations in the strain config file. +""" +from __future__ import annotations + +import argparse +import logging +import re +from collections.abc import Sequence +from typing import Callable, Dict, List, Any +from io import TextIOWrapper + +import requests +import yaml + +LOG = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def validate_repo_url(url: str) -> bool: + """Validate a repository URL by making a GET request.""" + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + return True + except requests.exceptions.RequestException: + LOG.exception("Exception while checking repo URL: %s", url) + return False + + +def check_edx_platform_repo(data: Dict[str, Any]) -> bool: + """Check the edx_platform_repository URL in the YAML data.""" + edx_platform_repository = data.get('EDX_PLATFORM_REPOSITORY', "").rstrip('.git') + edx_platform_version = data.get('EDX_PLATFORM_VERSION', "") + url = f"{edx_platform_repository}/tree/{edx_platform_version}" + if not validate_repo_url(url): + LOG.error("Failed to validate edx_platform_repository URL: %s", url) + return False + return True + + +def check_openedx_extra_pip_req_repos(data: Dict[str, Any]) -> bool: + """Check additional pip requirement repos in the YAML data.""" + pattern = r"git\+(https?://\S+?)(?:#|$)" + for repo in data.get('OPENEDX_EXTRA_PIP_REQUIREMENTS', []): + match = re.search(pattern, repo) + if match: + url = match.group(1).replace('@', '/tree/').replace('.git', '') + if not validate_repo_url(url): + LOG.error("Failed to validate OPENEDX_EXTRA_PIP_REQUIREMENTS URL: %s", url) + return False + return True + + +def validate_data(data: Dict[str, Any], checks: List[Callable[[Dict[str, Any]], bool]]) -> bool: + """Run all provided validation checks on the YAML data.""" + return all(check(data) for check in checks) + + +def main(argv: Sequence[str] | None = None) -> int: + """ + Entry point for validating repository URLs in a strain configuration file. + + This function parses command-line arguments to load a YAML file, performs + validation checks on specific repository URLs, and logs results. If any + validation fails, an error code is returned. + + Args: + argv (Sequence[str] | None): Optional sequence of command-line arguments. + If None, arguments will be taken from sys.argv. + + Returns: + int: 0 if all URLs are validated successfully, 1 if any validation fails. + """ + parser = argparse.ArgumentParser(description="Validate repository URLs in strain config file.") + parser.add_argument("file", type=argparse.FileType("r"), nargs="+") + args = parser.parse_args(argv) + + strain_file: TextIOWrapper = args.file[0] + try: + data = yaml.safe_load(strain_file) + except yaml.YAMLError: + LOG.exception("Error loading YAML data.") + return 1 + + checks = [ + check_edx_platform_repo, + check_openedx_extra_pip_req_repos + ] + + if not validate_data(data, checks): + return 1 + + LOG.info("All repository URLs validated successfully.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/scripts/strain_validation/validate_schema.py b/.github/workflows/scripts/strain_validation/validate_schema.py new file mode 100644 index 0000000..f2bbf8b --- /dev/null +++ b/.github/workflows/scripts/strain_validation/validate_schema.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +""" +Validate syntax in a YAML file based on specific rules. +""" + +from __future__ import annotations + +import argparse +import logging +from collections.abc import Sequence +from io import TextIOWrapper +from typing import Any, Dict + +import yaml +from schema import Schema, And, Optional, Use, SchemaError, Regex + +LOG = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +TUTOR_VERSION_REGEX = r"^v\d+\.\d+\.\d+$" +EMPTY_LIST_ERROR_MSG = "{} must be a non-empty list of non-empty strings" + + +def perform_extra_validations(data: Dict[str, Any]) -> Dict[str, Any]: + """ + Perform additional validations on the strain configuration data. + + Checks for custom conditions not covered by the schema: + + - Ensure either PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME is defined + if PICASSO_THEMES exists. + + Args: + data (Dict[str, Any]): The YAML data dictionary. + + Returns: + Dict[str, Any]: Validated data dictionary. + + Raises: + SchemaError: If required conditions are not met. + """ + if "PICASSO_THEMES" in data and not ("PICASSO_THEMES_NAME" in data or "PICASSO_DEFAULT_SITE_THEME" in data): + raise SchemaError("PICASSO_THEMES_NAME or PICASSO_DEFAULT_SITE_THEME must be defined.") + return data + +STRAIN_SCHEMA = Schema( + Use(perform_extra_validations), + { + "TUTOR_VERSION": And( + str, + Regex(TUTOR_VERSION_REGEX, error="TUTOR_VERSION must be in the format vX.Y.Z (e.g., v5.3.0)") + ), + Optional(Regex(r"^PICASSO_.+_DPKG$")): { + "name": And(str, len), + "repo": And(str, len), + "version": And(str, len) + }, + Optional("PICASSO_THEMES"): And( + list, + len, + [ + { + "name": And(str, len), + "repo": And(str, len), + "version": And(str, len) + } + ] + ), + Optional("PICASSO_THEMES_NAME"): And( + list, + len, + lambda x: all(isinstance(item, str) and item for item in x), + error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEMES_NAME"), + ), + Optional("PICASSO_THEME_DIRS"): And( + list, + len, + lambda x: all(isinstance(item, str) and item for item in x), + error=EMPTY_LIST_ERROR_MSG.format("PICASSO_THEME_DIRS"), + ) + }, + ignore_extra_keys=True +) + +def validate_with_warnings(data: Dict[str, Any]) -> bool: + """ + Validate the data against the strain schema and log warnings for missing optional keys. + + Args: + data (Dict[str, Any]): The loaded YAML data to validate. + + Returns: + bool: True if validation is successful; otherwise, False. + + Logs warnings for missing optional keys such as PICASSO_THEMES and PICASSO_THEMES_NAME. + """ + try: + STRAIN_SCHEMA.validate(data) + if not data.get("PICASSO_THEMES"): + LOG.warning("No PICASSO_THEMES key found; themes will not be enabled.") + if not data.get("PICASSO_THEMES") and not data.get("PICASSO_THEMES_NAME"): + LOG.warning("No PICASSO_THEMES_NAME key found; default themes will be used.") + LOG.info("Strain syntax and structure validation completed successfully.") + return True + except SchemaError as e: + LOG.error("Schema validation failed: %s", e) + return False + +def validate_yaml_file(yaml_file: TextIOWrapper) -> bool: + """ + Load and validate YAML file structure against the defined schema. + + Args: + yaml_file (TextIOWrapper): Opened YAML file for reading. + + Returns: + bool: True if YAML content is valid; otherwise, False. + + Logs syntax errors in the YAML structure. + """ + try: + config_yml = yaml.safe_load(yaml_file) + return validate_with_warnings(config_yml) + except yaml.YAMLError as yaml_error: + LOG.error("YAML syntax error: %s", yaml_error) + return False + +def main(argv: Sequence[str] | None = None) -> int: + """ + Execute syntax checks on a configuration file for strains. + + Args: + argv (Sequence[str] | None): Command-line arguments. + + Returns: + int: 0 if configuration file is valid; 1 if invalid. + """ + parser = argparse.ArgumentParser(description="Validate YAML file syntax and strain schema.") + parser.add_argument("file", type=argparse.FileType("r"), nargs="+", help="YAML file to validate.") + args = parser.parse_args(argv) + + strain_file: TextIOWrapper = args.file[0] + if not validate_yaml_file(strain_file): + return 1 + + return 0 + +if __name__ == "__main__": + raise SystemExit(main())