From 5a6a8cd9588187f0ebcaaf58316905655d8b60d6 Mon Sep 17 00:00:00 2001 From: Ren Silva Date: Thu, 27 Nov 2025 04:53:05 +0000 Subject: [PATCH 1/2] created help-document analysis script --- .devcontainer/devcontainer.json | 9 +- scripts/ai/.env.example | 14 + scripts/ai/.gitignore | 20 ++ scripts/ai/README.md | 209 ++++++++++++ scripts/ai/evaluate-help.py | 170 ++++++++++ scripts/ai/help_evaluator.py | 308 ++++++++++++++++++ .../document-quality-scoring-framework.md | 216 ++++++++++++ scripts/ai/prompts/extractor.md | 43 +++ scripts/ai/prompts/simple-evaluator.md | 92 ++++++ scripts/ai/requirements.txt | 5 + 10 files changed, 1082 insertions(+), 4 deletions(-) create mode 100644 scripts/ai/.env.example create mode 100644 scripts/ai/.gitignore create mode 100644 scripts/ai/README.md create mode 100644 scripts/ai/evaluate-help.py create mode 100644 scripts/ai/help_evaluator.py create mode 100644 scripts/ai/prompts/document-quality-scoring-framework.md create mode 100644 scripts/ai/prompts/extractor.md create mode 100644 scripts/ai/prompts/simple-evaluator.md create mode 100644 scripts/ai/requirements.txt diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 82903b8ee52..7dea29cc189 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,14 +1,15 @@ { "name": "Azure CLI Dev Container", "build": { - "dockerfile": "Dockerfile" + "dockerfile": "Dockerfile", + "context": ".." }, "features": { "ghcr.io/devcontainers/features/github-cli:1": {} }, - "workspaceFolder": "/workspaces", - "onCreateCommand": "uv venv .venv --seed", - "postCreateCommand": "REPO_NAME=$(basename $GITHUB_REPOSITORY) && cat $REPO_NAME/.devcontainer/init.sh >> ~/.bashrc && mkdir .vscode && cp $REPO_NAME/.devcontainer/mcp.json .vscode/", + // "workspaceFolder": "/workspaces", + "onCreateCommand": "uv venv .venv --seed --clear", + "postCreateCommand": "cat .devcontainer/init.sh >> ~/.bashrc && mkdir -p .vscode && cp .devcontainer/mcp.json .vscode/", "hostRequirements": { "cpus": 16, "memory": "64gb", diff --git a/scripts/ai/.env.example b/scripts/ai/.env.example new file mode 100644 index 00000000000..8c16354bded --- /dev/null +++ b/scripts/ai/.env.example @@ -0,0 +1,14 @@ +# Azure OpenAI Configuration +# Copy this file to .env and fill in your actual values + +# Your Azure OpenAI API key +AZURE_OPENAI_API_KEY=your_api_key_here + +# Azure OpenAI API version (e.g., 2024-02-15-preview) +AZURE_OPENAI_API_VERSION=2024-02-15-preview + +# Your Azure OpenAI endpoint URL +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/ + +# The name of your deployed model +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4 diff --git a/scripts/ai/.gitignore b/scripts/ai/.gitignore new file mode 100644 index 00000000000..0fe8dcc0fc3 --- /dev/null +++ b/scripts/ai/.gitignore @@ -0,0 +1,20 @@ +# Ignore analysis output directory +analysis/ + +# Ignore environment configuration file +.env + +# Ignore Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd + +# Ignore Jupyter notebooks and checkpoints +*.ipynb +.ipynb_checkpoints/ + +# Ignore virtual environments +venv/ +.venv/ +env/ diff --git a/scripts/ai/README.md b/scripts/ai/README.md new file mode 100644 index 00000000000..6f6ae273f46 --- /dev/null +++ b/scripts/ai/README.md @@ -0,0 +1,209 @@ +# Azure CLI Help Documentation Evaluator + +This tool evaluates the quality of Azure CLI help documentation using Azure OpenAI. It extracts help content from Python files and evaluates them using two different evaluation frameworks. + +## Overview + +The evaluator performs three main steps: +1. **Extract**: Extracts help documentation from `*_help.py` files +2. **Evaluate**: Runs two evaluators: + - **Simple Evaluator**: Quick quality assessment + - **Document Quality Scoring Framework (DQSF)**: Comprehensive Microsoft Learn quality standards assessment +3. **Report**: Generates detailed markdown reports with both evaluations + +## Prerequisites + +- Python 3.10 or higher +- Azure OpenAI access with API credentials + +## Installation + +1. Install required dependencies: +```bash +pip install -r requirements.txt +``` + +2. Create a `.env` file in the `scripts/ai/` directory with your Azure OpenAI credentials: + +```bash +# Azure OpenAI Configuration +AZURE_OPENAI_API_KEY=your_api_key_here +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/ +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini +``` + +### Getting Azure OpenAI Credentials + +- `AZURE_OPENAI_API_KEY`: Your Azure OpenAI API key from Azure Portal +- `AZURE_OPENAI_API_VERSION`: API version (e.g., `2025-04-01-preview`) +- `AZURE_OPENAI_ENDPOINT`: Your Azure OpenAI resource endpoint URL +- `AZURE_OPENAI_DEPLOYMENT_NAME`: The name of your deployed model (e.g., `gpt-4o-mini`, `gpt-4`) + +## Usage + +### Evaluate a Single Help File + +```bash +python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/search/_help.py +``` + +### Evaluate All Help Files in a Directory + +```bash +python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/ +``` + +### Specify Custom Output Directory + +```bash +python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/ -o ./custom-analysis +``` + +## Command Line Options + +- `-i, --input` (required): Path to help file or directory containing help files +- `-o, --output` (optional): Output directory for analysis results (default: `./analysis`) + +## Output + +The tool generates markdown files in the output directory with the following structure: + +``` +analysis/ + ├── modulename_YYYYMMDD-HHMMSS.md + ├── ... +``` + +Each analysis file contains: +- **Metadata**: Date, source file, model used, token usage +- **Original Source Code**: Collapsible section with the raw Python code +- **Extracted Help Content**: The extracted documentation +- **Simple Quality Evaluation**: Quick assessment results +- **DQSF Evaluation**: Comprehensive quality scores (out of 100 points) + +## Evaluation Frameworks + +### Simple Evaluator +Provides a quick quality assessment across: +- Clarity and Readability +- Completeness +- Accuracy +- Structure and Organization +- Examples and Practical Usage +- Accessibility + +### Document Quality Scoring Framework (DQSF) +A comprehensive framework based on Microsoft Learn standards, evaluating five categories (20 points each): +1. **Module Description**: Overview and context +2. **Command Description**: Behavior and prerequisites +3. **Examples**: Runnable, up-to-date examples +4. **Parameter Descriptions**: Clear, detailed parameter documentation +5. **Parameter Properties/Sets**: Complete parameter specifications + +Each category is scored across six dimensions: +- Practical & example-rich +- Consistent with style guide +- Detailed & technically complete +- Current and up-to-date +- Easy to navigate +- Clear parameter descriptions + +## Architecture + +The tool consists of two main components: + +### `help_evaluator.py` +The `HelpEvaluator` class handles: +- Azure OpenAI client initialization +- Prompt template management +- Help content extraction +- Running both evaluators +- Generating analysis reports + +### `evaluate-help.py` +The main script that: +- Parses command-line arguments +- Finds help files (supports single file or directory) +- Manages the evaluation workflow +- Provides progress feedback with spinner +- Generates summary statistics + +## Prompts + +Evaluation prompts are stored in the `prompts/` directory: +- `extractor.md`: Extracts help content and module name +- `simple-evaluator.md`: Simple quality evaluation criteria +- `document-quality-scoring-framework.md`: DQSF evaluation criteria + +You can customize these prompts to adjust the evaluation criteria. + +## Token Usage + +The tool tracks token usage for each operation: +- Extraction tokens +- Simple evaluation tokens +- DQSF evaluation tokens +- Total tokens per file + +This helps estimate costs and optimize evaluations. + +## Example Output + +``` +Searching for help files in: ../../src/azure-cli/azure/cli/command_modules/search/ +Found 1 help file(s) + +Initializing HelpEvaluator... +Output directory: ./analysis +Loaded prompts: ['extractor', 'simple-evaluator', 'document-quality-scoring-framework'] + +================================================================================ +Starting evaluation +================================================================================ + +[1/1] Processing: ../../src/azure-cli/azure/cli/command_modules/search/_help.py + ⠋ Working... + ✓ Analysis saved to: search_20251127-123045.md + Total tokens used: 4821 + +================================================================================ +Evaluation Complete +================================================================================ + +Processed: 1/1 files +Total tokens used: 4,821 + +Analysis files saved to: ./analysis/ + +Results summary: + - search: 4821 tokens → search_20251127-123045.md +``` + +## Troubleshooting + +### Missing Environment Variables +If you get an error about missing API credentials, ensure your `.env` file is in the `scripts/ai/` directory and contains all required variables. + +### API Rate Limits +If you encounter rate limit errors, consider: +- Adding delays between evaluations +- Using a model with higher rate limits +- Processing files in smaller batches + +### Token Limits +If evaluations fail due to token limits: +- Use a model with larger context windows +- Increase `max_tokens` parameter in the code +- Split large help files into smaller sections + +## Contributing + +To modify evaluation criteria: +1. Edit the appropriate prompt file in `prompts/` +2. Test with a sample help file +3. Adjust scoring weights or add new dimensions as needed + +## License + +This tool is part of the Azure CLI project and follows the same license terms. diff --git a/scripts/ai/evaluate-help.py b/scripts/ai/evaluate-help.py new file mode 100644 index 00000000000..571e39dbf30 --- /dev/null +++ b/scripts/ai/evaluate-help.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +""" +Evaluate Azure CLI help documentation quality. + +This script finds all help files in a given path and evaluates them using +the HelpEvaluator class with Azure OpenAI. +""" + +import argparse +import sys +import threading +import time +from pathlib import Path +from help_evaluator import HelpEvaluator + + +class Spinner: + """Simple spinner for long-running operations.""" + + def __init__(self, message="Processing"): + self.message = message + self.spinning = False + self.spinner_thread = None + + def spin(self): + """Spin animation.""" + spinner_chars = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] + idx = 0 + while self.spinning: + print(f"\r {spinner_chars[idx]} {self.message}...", end="", flush=True) + idx = (idx + 1) % len(spinner_chars) + time.sleep(0.1) + print("\r" + " " * (len(self.message) + 10) + "\r", end="", flush=True) + + def start(self): + """Start the spinner.""" + self.spinning = True + self.spinner_thread = threading.Thread(target=self.spin) + self.spinner_thread.daemon = True + self.spinner_thread.start() + + def stop(self): + """Stop the spinner.""" + self.spinning = False + if self.spinner_thread: + self.spinner_thread.join() + + +def find_help_files(input_path): + """ + Find all help files in the given path. + + Args: + input_path: Path to search for help files + + Returns: + List of Path objects for help files + """ + path = Path(input_path) + + if not path.exists(): + print(f"Error: Path '{input_path}' does not exist") + sys.exit(1) + + help_files = [] + + if path.is_file(): + # Single file + if path.name.endswith('_help.py'): + help_files.append(path) + else: + print(f"Warning: File '{path.name}' does not match help file pattern (*_help.py)") + else: + # Directory - search recursively + help_files = list(path.rglob("*_help.py")) + + return help_files + + +def main(): + """Main function to run help evaluation.""" + parser = argparse.ArgumentParser( + description="Evaluate Azure CLI help documentation quality using Azure OpenAI", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Evaluate a single help file + python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/search/_help.py + + # Evaluate all help files in a directory + python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/ + + # Specify custom output directory + python evaluate-help.py -i ../../src/azure-cli/azure/cli/command_modules/ -o ./custom-analysis + """ + ) + + parser.add_argument( + "-i", "--input", + dest="input_path", + required=True, + help="Path to help file or directory containing help files" + ) + + parser.add_argument( + "-o", "--output", + default="./analysis", + help="Output directory for analysis results (default: ./analysis)" + ) + + args = parser.parse_args() + + # Find help files + print(f"Searching for help files in: {args.input_path}") + help_files = find_help_files(args.input_path) + + if not help_files: + print(f"No help files found in '{args.input_path}'") + sys.exit(1) + + print(f"Found {len(help_files)} help file(s)") + + # Initialize evaluator + print(f"\nInitializing HelpEvaluator...") + print(f"Output directory: {args.output}") + evaluator = HelpEvaluator(output_dir=args.output) + + # Process each help file + print("\n" + "="*80) + print("Starting evaluation") + print("="*80) + + results = [] + total_tokens_all = 0 + + for i, help_file in enumerate(help_files, 1): + print(f"\n[{i}/{len(help_files)}] Processing: {help_file}") + + spinner = Spinner("Working") + try: + spinner.start() + result = evaluator.evaluate_file(help_file, show_progress=False) + spinner.stop() + + print(f" ✓ Analysis saved to: {result['output_path'].name}") + print(f" Total tokens used: {result['total_tokens']}") + + results.append(result) + total_tokens_all += result['total_tokens'] + except Exception as e: + spinner.stop() + print(f" ✗ Error: {e}") + continue + + # Summary + print("\n" + "="*80) + print("Evaluation Complete") + print("="*80) + print(f"\nProcessed: {len(results)}/{len(help_files)} files") + print(f"Total tokens used: {total_tokens_all:,}") + + if results: + print(f"\nAnalysis files saved to: {args.output}/") + print("\nResults summary:") + for result in results: + print(f" - {result['module_name']}: {result['total_tokens']} tokens → {result['output_path'].name}") + + +if __name__ == "__main__": + main() diff --git a/scripts/ai/help_evaluator.py b/scripts/ai/help_evaluator.py new file mode 100644 index 00000000000..a82d8db3f8c --- /dev/null +++ b/scripts/ai/help_evaluator.py @@ -0,0 +1,308 @@ +""" +HelpEvaluator class for evaluating Azure CLI help documentation quality. +""" + +import os +from pathlib import Path +from datetime import datetime +from openai import AzureOpenAI +from dotenv import load_dotenv + + +class HelpEvaluator: + """ + A class to evaluate Azure CLI help documentation using Azure OpenAI. + """ + + def __init__(self, output_dir="analysis"): + """ + Initialize the HelpEvaluator. + + Args: + output_dir: Directory where analysis results will be saved (default: "analysis") + """ + # Load environment variables + load_dotenv() + + # Initialize Azure OpenAI client + self.client = AzureOpenAI( + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") + ) + + # Set output directory + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + + # Cache for prompts + self.prompts = {} + + # System messages for different prompt types + self.system_messages = { + "simple-evaluator": "You are an expert technical writer and documentation evaluator.", + "document-quality-scoring-framework": "You are an expert technical documentation evaluator specializing in Azure CLI and Azure PowerShell reference documentation.", + "extractor": "You are an expert at analyzing and extracting documentation from code files." + } + + # Load all prompts + self._load_all_prompts() + + def _load_all_prompts(self): + """Load all prompt templates from the prompts folder.""" + prompts_dir = Path(__file__).parent / "prompts" + if prompts_dir.exists(): + for prompt_file in prompts_dir.glob("*.md"): + prompt_name = prompt_file.stem + with open(prompt_file, 'r', encoding='utf-8') as f: + self.prompts[prompt_name] = f.read() + print(f"Loaded prompts: {list(self.prompts.keys())}") + + def _load_prompt_template(self, prompt_name): + """ + Load a prompt template from the cache. + + Args: + prompt_name: Name of the prompt file (without .md extension) + + Returns: + The prompt template as a string + """ + if prompt_name not in self.prompts: + raise ValueError(f"Prompt '{prompt_name}' not found. Available prompts: {list(self.prompts.keys())}") + return self.prompts[prompt_name] + + def llm_chat(self, content, prompt_name, temperature=0.3, max_tokens=4000): + """ + Generic function to chat with Azure OpenAI using a specific prompt. + + Args: + content: The content to process (e.g., help document, extracted text) + prompt_name: Name of the prompt to use + temperature: Temperature parameter for the model (default: 0.3) + max_tokens: Maximum tokens for the response (default: 4000) + + Returns: + Dictionary containing the results + """ + # Load the prompt template + prompt_template = self._load_prompt_template(prompt_name) + + # Format the prompt with the content + full_prompt = prompt_template.replace("{HELP_DOCUMENT}", content) + + # Get system message + system_message = self.system_messages.get(prompt_name, "You are a helpful AI assistant.") + + # Call Azure OpenAI + response = self.client.chat.completions.create( + model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"), + messages=[ + {"role": "system", "content": system_message}, + {"role": "user", "content": full_prompt} + ], + temperature=temperature, + max_tokens=max_tokens + ) + + # Extract the result + result_content = response.choices[0].message.content + + return { + "prompt_used": prompt_name, + "result": result_content, + "model": response.model, + "usage": { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + } + } + + def extract_help_content(self, file_path): + """ + Extract help content from a help file. + + Args: + file_path: Path to the help file + + Returns: + Tuple of (module_name, extracted_help, raw_content, extraction_result) + """ + # Read the file + with open(file_path, 'r', encoding='utf-8') as f: + raw_content = f.read() + + # Extract help content using the extractor prompt + extraction_result = self.llm_chat(raw_content, prompt_name="extractor") + + # Parse the extraction result to separate module name and help content + extraction_text = extraction_result['result'] + if "MODULE_NAME:" in extraction_text and "---" in extraction_text: + parts = extraction_text.split("---", 1) + module_name_line = parts[0].strip() + module_name = module_name_line.replace("MODULE_NAME:", "").strip() + extracted_help = parts[1].strip() + else: + # Fallback: extract from file path + module_name = Path(file_path).stem.replace("_help", "") + if module_name.startswith("_"): + module_name = module_name[1:] + extracted_help = extraction_text + + return module_name, extracted_help, raw_content, extraction_result + + def evaluate_help(self, extracted_help, show_progress=True): + """ + Evaluate help content using both evaluators. + + Args: + extracted_help: The extracted help content + show_progress: Whether to print progress messages (default: True) + + Returns: + Tuple of (simple_evaluation_result, dqsf_evaluation_result) + """ + if show_progress: + print(" Running Simple Evaluator...") + simple_evaluation_result = self.llm_chat(extracted_help, prompt_name="simple-evaluator") + + if show_progress: + print(" Running Document Quality Scoring Framework Evaluator...") + dqsf_evaluation_result = self.llm_chat(extracted_help, prompt_name="document-quality-scoring-framework") + + return simple_evaluation_result, dqsf_evaluation_result + + def save_analysis(self, module_name, file_path, raw_content, extracted_help, + extraction_result, simple_evaluation_result, dqsf_evaluation_result): + """ + Save the analysis results to a markdown file. + + Args: + module_name: Name of the module + file_path: Path to the original help file + raw_content: Raw content of the help file + extracted_help: Extracted help content + extraction_result: Result from extraction + simple_evaluation_result: Result from simple evaluator + dqsf_evaluation_result: Result from DQSF evaluator + + Returns: + Path to the saved analysis file + """ + # Create timestamp + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + + # Create filename + output_filename = f"{module_name}_{timestamp}.md" + output_path = self.output_dir / output_filename + + # Calculate total token usage + total_tokens = (extraction_result['usage']['total_tokens'] + + simple_evaluation_result['usage']['total_tokens'] + + dqsf_evaluation_result['usage']['total_tokens']) + + # Prepare the content to save + content = f"""# Help Document Analysis: {module_name} + +**Date**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} +**Source File**: {file_path} +**Model**: {simple_evaluation_result['model']} + +## Token Usage +- Extraction: {extraction_result['usage']['total_tokens']} tokens +- Simple Evaluation: {simple_evaluation_result['usage']['total_tokens']} tokens +- DQSF Evaluation: {dqsf_evaluation_result['usage']['total_tokens']} tokens + +- **Total**: {total_tokens} tokens + +--- + +## Original Source Code + +
+Click to expand original code + +```python +{raw_content} +``` + +
+ +--- + +## Extracted Help Content + +
+Click to expand extracted help content + +{extracted_help} + +
+ +--- + +## Simple Quality Evaluation + +{simple_evaluation_result['result']} + +--- + +## Document Quality Scoring Framework (DQSF) Evaluation + +{dqsf_evaluation_result['result']} +""" + + # Save to file + with open(output_path, 'w', encoding='utf-8') as f: + f.write(content) + + return output_path, total_tokens + + def evaluate_file(self, file_path, show_progress=True): + """ + Complete evaluation workflow for a single help file. + + Args: + file_path: Path to the help file + show_progress: Whether to print progress messages (default: True) + + Returns: + Dictionary with evaluation results and metadata + """ + if show_progress: + print(f"\nProcessing: {file_path}") + + # Step 1: Extract help content + if show_progress: + print(" Extracting help content...") + module_name, extracted_help, raw_content, extraction_result = self.extract_help_content(file_path) + if show_progress: + print(f" Extracted module: {module_name}") + + # Step 2: Evaluate help quality + if show_progress: + print(" Evaluating help quality...") + simple_evaluation_result, dqsf_evaluation_result = self.evaluate_help(extracted_help, show_progress=show_progress) + + # Step 3: Save analysis + if show_progress: + print(" Saving analysis...") + output_path, total_tokens = self.save_analysis( + module_name, file_path, raw_content, extracted_help, + extraction_result, simple_evaluation_result, dqsf_evaluation_result + ) + + if show_progress: + print(f" ✓ Analysis saved to: {output_path}") + print(f" Total tokens used: {total_tokens}") + + return { + "module_name": module_name, + "file_path": file_path, + "output_path": output_path, + "total_tokens": total_tokens, + "extraction_tokens": extraction_result['usage']['total_tokens'], + "simple_eval_tokens": simple_evaluation_result['usage']['total_tokens'], + "dqsf_eval_tokens": dqsf_evaluation_result['usage']['total_tokens'] + } diff --git a/scripts/ai/prompts/document-quality-scoring-framework.md b/scripts/ai/prompts/document-quality-scoring-framework.md new file mode 100644 index 00000000000..6ca7cdb3cea --- /dev/null +++ b/scripts/ai/prompts/document-quality-scoring-framework.md @@ -0,0 +1,216 @@ +# Document Quality Scoring Framework for Microsoft Learn + +You are an expert technical documentation evaluator specializing in Azure CLI and Azure PowerShell reference documentation. Your task is to evaluate documentation quality using the standardized Document Quality Scoring Framework (DQSF) developed for Microsoft Learn. + +## Objective + +Establish a clear and standardized scoring to evaluate the quality of Azure CLI and Azure PowerShell reference documentation on Microsoft Learn. The framework enables consistent assessment across key quality dimensions, provides actionable insights for improvement, and supports AI-driven recommendations. + +## Quality Categories + +The framework evaluates documentation across six key quality dimensions: + +1. **Practical & example-rich**: Real-world usage, runnable examples, edge cases +2. **Consistent**: Terminology and tone aligned with Learn style guide +3. **Detailed & technically complete**: Includes return types, constraints, dependencies +4. **Current**: Matches latest product version and syntax +5. **Easy to navigate**: Logical grouping, headings, anchors, cross-links +6. **Clear & meaningful parameter descriptions**: Avoid tautologies, include enumerations and defaults + +## Scoring System + +Each category below is scored out of **20 points total**, distributed across the six dimensions with the following weights: + +| Dimension | Weight Range | +|-----------|--------------| +| Practical & example-rich | 3-6 points | +| Consistent | 3 points | +| Detailed & technically complete | 4-6 points | +| Current | 3-4 points | +| Easy to navigate | 2-3 points | +| Clear & meaningful parameter descriptions | 2-4 points | + +## Categories to Evaluate + +### 1. Module Description (20 points) +**Purpose**: Accurate, concise overview of module purpose with context and links to related modules. + +**Dimension Weights**: +- Practical & example-rich: 4 points +- Consistent: 3 points +- Detailed & technically complete: 5 points +- Current: 3 points +- Easy to navigate: 3 points +- Clear & meaningful parameter descriptions: 2 points + +**What to look for**: +- Does it provide clear context about the module's purpose? +- Are related modules linked? +- Is the description accurate and up-to-date? + +--- + +### 2. Command Description (20 points) +**Purpose**: Explains command behavior, prerequisites, and impact without ambiguity or tautologies. + +**Dimension Weights**: +- Practical & example-rich: 4 points +- Consistent: 3 points +- Detailed & technically complete: 5 points +- Current: 3 points +- Easy to navigate: 3 points +- Clear & meaningful parameter descriptions: 2 points + +**What to look for**: +- Does it clearly explain what the command does? +- Are prerequisites mentioned? +- Are potential impacts or side effects documented? +- Avoid descriptions that merely restate the command name + +--- + +### 3. Examples (20 points) +**Purpose**: Runnable, up-to-date examples covering basic and advanced scenarios with descriptive titles and explanations. + +**Dimension Weights**: +- Practical & example-rich: 6 points +- Consistent: 3 points +- Detailed & technically complete: 4 points +- Current: 4 points +- Easy to navigate: 3 points + +**What to look for**: +- Are examples actually present? +- Are they runnable and complete? +- Do they cover common use cases? +- Are they up-to-date with current syntax? +- Do they include clear descriptions/titles? + +--- + +### 4. Parameter Descriptions (20 points) +**Purpose**: Clear semantics, units, ranges, defaults, enumerations without tautological definitions. + +**Dimension Weights**: +- Practical & example-rich: 3 points +- Consistent: 3 points +- Detailed & technically complete: 5 points +- Current: 3 points +- Easy to navigate: 2 points +- Clear & meaningful parameter descriptions: 4 points + +**What to look for**: +- Do descriptions explain what the parameter does (not just repeat its name)? +- Are valid values/enumerations listed? +- Are defaults specified? +- Are units, ranges, and constraints documented? +- Are formats properly specified (e.g., JSON structure)? + +--- + +### 5. Parameter Properties / Parameter Sets (20 points) +**Purpose**: Complete listing of properties, constraints, defaults, and relationships with clearly defined parameter sets. + +**Dimension Weights**: +- Practical & example-rich: 3 points +- Consistent: 3 points +- Detailed & technically complete: 6 points +- Current: 3 points +- Easy to navigate: 2 points +- Clear & meaningful parameter descriptions: 3 points + +**What to look for**: +- Are parameter sets clearly defined? +- Are required vs. optional parameters marked? +- Are property hierarchies and relationships documented? +- Are constraints and validation rules specified? + +--- + +## Your Task + +Evaluate the following help document using the DQSF framework. + +## Help Document to Evaluate + +``` +{HELP_DOCUMENT} +``` + +## Response Format + +Provide your evaluation in the following structure: + +### Overall Summary +[Provide a 2-3 sentence summary of the document's overall quality] + +### Category Scores + +#### 1. Module Description: [X/20] +- Practical & example-rich: [X/4] +- Consistent: [X/3] +- Detailed & technically complete: [X/5] +- Current: [X/3] +- Easy to navigate: [X/3] +- Clear & meaningful parameter descriptions: [X/2] + +**Strengths**: [List key strengths] +**Issues**: [List specific issues] +**Recommendations**: [Actionable improvements] + +#### 2. Command Description: [X/20] +- Practical & example-rich: [X/4] +- Consistent: [X/3] +- Detailed & technically complete: [X/5] +- Current: [X/3] +- Easy to navigate: [X/3] +- Clear & meaningful parameter descriptions: [X/2] + +**Strengths**: [List key strengths] +**Issues**: [List specific issues] +**Recommendations**: [Actionable improvements] + +#### 3. Examples: [X/20] +- Practical & example-rich: [X/6] +- Consistent: [X/3] +- Detailed & technically complete: [X/4] +- Current: [X/4] +- Easy to navigate: [X/3] + +**Strengths**: [List key strengths] +**Issues**: [List specific issues] +**Recommendations**: [Actionable improvements] + +#### 4. Parameter Descriptions: [X/20] +- Practical & example-rich: [X/3] +- Consistent: [X/3] +- Detailed & technically complete: [X/5] +- Current: [X/3] +- Easy to navigate: [X/2] +- Clear & meaningful parameter descriptions: [X/4] + +**Strengths**: [List key strengths] +**Issues**: [List specific issues] +**Recommendations**: [Actionable improvements] + +#### 5. Parameter Properties / Parameter Sets: [X/20] +- Practical & example-rich: [X/3] +- Consistent: [X/3] +- Detailed & technically complete: [X/6] +- Current: [X/3] +- Easy to navigate: [X/2] +- Clear & meaningful parameter descriptions: [X/3] + +**Strengths**: [List key strengths] +**Issues**: [List specific issues] +**Recommendations**: [Actionable improvements] + +--- + +### Final Score: [X/100] + +### Priority Improvements +[List the top 3-5 most critical improvements ranked by impact] + +### AI-Driven Recommendations +[Provide specific, actionable recommendations that could be implemented to improve the documentation quality] diff --git a/scripts/ai/prompts/extractor.md b/scripts/ai/prompts/extractor.md new file mode 100644 index 00000000000..c9512457766 --- /dev/null +++ b/scripts/ai/prompts/extractor.md @@ -0,0 +1,43 @@ +# Help Content Extraction Prompt + +You are an expert at analyzing and extracting documentation from code files. Your task is to extract the help documentation from the provided content, which may be in Python code or JSON format. + +## Your Task + +The text you are reading contains either Python code or JSON code. Your job is to: + +1. **Identify the format**: Determine if the content is Python code or JSON +2. **Extract module name**: Identify the Azure CLI module name (e.g., 'search', 'storage', 'network') +3. **Extract help content**: Extract all help documentation from the top-level document +4. **Clean and format**: Return the results in a structured format + +## What to Extract + +- For Python files: Extract docstrings, help text, command descriptions, parameter descriptions, and examples +- For JSON files: Extract description fields, help text, documentation strings, and usage examples +- Focus on the **top-level document** - the main help content, not implementation details + +## What NOT to Include + +- Implementation code (functions, classes, logic) +- Import statements +- Internal comments that are not user-facing documentation +- Configuration details that are not part of help text + +## Content to Analyze + +``` +{HELP_DOCUMENT} +``` + +## Response Format + +You MUST respond with EXACTLY this structure: + +MODULE_NAME: [the module name here] + +--- + +[The extracted help documentation in clean markdown format, preserving the structure and organization of the help content] + +IMPORTANT: Start your response with "MODULE_NAME: " followed by the module name on the first line, then "---" on a line by itself, then the help content. diff --git a/scripts/ai/prompts/simple-evaluator.md b/scripts/ai/prompts/simple-evaluator.md new file mode 100644 index 00000000000..1d524488e94 --- /dev/null +++ b/scripts/ai/prompts/simple-evaluator.md @@ -0,0 +1,92 @@ +# Help Document Quality Evaluation Prompt + +You are an expert technical writer and documentation evaluator. Your task is to evaluate the quality of a help document based on the following criteria: + +## Evaluation Criteria + +### 1. Clarity and Readability (Score: 1-10) +- Is the language clear and easy to understand? +- Are technical terms properly explained? +- Is the document well-organized with logical flow? +- Are sentences concise and free of unnecessary jargon? + +### 2. Completeness (Score: 1-10) +- Does the document cover all necessary topics? +- Are all parameters, options, and features explained? +- Are prerequisites and requirements clearly stated? +- Are edge cases and common scenarios addressed? + +### 3. Accuracy (Score: 1-10) +- Is the technical information correct? +- Are examples valid and executable? +- Are commands and syntax properly formatted? +- Are there any misleading or outdated statements? + +### 4. Structure and Organization (Score: 1-10) +- Is the document well-structured with clear headings? +- Is there a logical hierarchy of information? +- Are related topics grouped appropriately? +- Is navigation easy (table of contents, links)? + +### 5. Examples and Practical Usage (Score: 1-10) +- Are there sufficient examples? +- Do examples cover common use cases? +- Are examples clear and easy to follow? +- Do examples demonstrate best practices? + +### 6. Accessibility (Score: 1-10) +- Is the document accessible to the target audience? +- Is the tone appropriate (not too technical or too simple)? +- Are there multiple ways to understand concepts (examples, diagrams, explanations)? +- Is the document friendly to beginners while still useful for advanced users? + +## Your Task + +Please evaluate the following help document and provide: + +1. **Individual Scores**: Rate each criterion above on a scale of 1-10 +2. **Overall Score**: Provide an overall quality score (1-10) +3. **Strengths**: List 3-5 key strengths of the document +4. **Areas for Improvement**: List 3-5 specific areas that need improvement +5. **Recommendations**: Provide actionable recommendations to improve the document quality +6. **Summary**: A brief 2-3 sentence summary of the document's overall quality + +## Help Document to Evaluate + +``` +{HELP_DOCUMENT} +``` + +## Response Format + +Please structure your response as follows: + +### Scores +- Clarity and Readability: [score]/10 +- Completeness: [score]/10 +- Accuracy: [score]/10 +- Structure and Organization: [score]/10 +- Examples and Practical Usage: [score]/10 +- Accessibility: [score]/10 +- **Overall Score: [score]/10** + +### Strengths +1. [strength 1] +2. [strength 2] +3. [strength 3] +... + +### Areas for Improvement +1. [area 1] +2. [area 2] +3. [area 3] +... + +### Recommendations +1. [recommendation 1] +2. [recommendation 2] +3. [recommendation 3] +... + +### Summary +[Your 2-3 sentence summary here] diff --git a/scripts/ai/requirements.txt b/scripts/ai/requirements.txt new file mode 100644 index 00000000000..0d46b402379 --- /dev/null +++ b/scripts/ai/requirements.txt @@ -0,0 +1,5 @@ +# Requirements for Azure CLI Help Documentation Evaluator +# Install with: pip install -r requirements.txt + +openai>=2.8.1,<3.0.0 +python-dotenv>=1.2.1,<2.0.0 From e9f81176dd199a05fb9931f8ba36e8b4ee4aa6fe Mon Sep 17 00:00:00 2001 From: Ren Silva Date: Tue, 2 Dec 2025 03:50:01 +0000 Subject: [PATCH 2/2] added license headers to help analysers --- scripts/ai/evaluate-help.py | 8 +++++++- scripts/ai/help_evaluator.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/ai/evaluate-help.py b/scripts/ai/evaluate-help.py index 571e39dbf30..1eee31b64be 100644 --- a/scripts/ai/evaluate-help.py +++ b/scripts/ai/evaluate-help.py @@ -1,4 +1,10 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + """ Evaluate Azure CLI help documentation quality. diff --git a/scripts/ai/help_evaluator.py b/scripts/ai/help_evaluator.py index a82d8db3f8c..65df581f602 100644 --- a/scripts/ai/help_evaluator.py +++ b/scripts/ai/help_evaluator.py @@ -1,3 +1,10 @@ +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + """ HelpEvaluator class for evaluating Azure CLI help documentation quality. """