# Copyright (c) Microsoft. All rights reserved. """Check code blocks in Markdown files for syntax errors.""" import argparse from enum import Enum import glob import logging import tempfile import subprocess # nosec from pygments import highlight # type: ignore from pygments.formatters import TerminalFormatter from pygments.lexers import PythonLexer logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) class Colors(str, Enum): CEND = "\33[0m" CRED = "\33[31m" CREDBG = "\33[41m" CGREEN = "\33[32m" CGREENBG = "\33[42m" CVIOLET = "\33[35m" CGREY = "\33[90m" def with_color(text: str, color: Colors) -> str: """Prints a string with the specified color.""" return f"{color.value}{text}{Colors.CEND.value}" def expand_file_patterns(patterns: list[str], skip_glob: bool = False) -> list[str]: """Expand glob patterns to actual file paths.""" all_files: list[str] = [] for pattern in patterns: if skip_glob: # When skip_glob is True, treat patterns as literal file paths # Only include if it's a markdown file if pattern.endswith('.md'): matches = glob.glob(pattern, recursive=False) all_files.extend(matches) else: # Handle both relative and absolute paths with glob expansion matches = glob.glob(pattern, recursive=True) all_files.extend(matches) return sorted(set(all_files)) # Remove duplicates and sort def extract_python_code_blocks(markdown_file_path: str) -> list[tuple[str, int]]: """Extract Python code blocks from a Markdown file.""" with open(markdown_file_path, encoding="utf-8") as file: lines = file.readlines() code_blocks: list[tuple[str, int]] = [] in_code_block = False current_block: list[str] = [] for i, line in enumerate(lines): if line.strip().startswith("```python"): in_code_block = True current_block = [] elif line.strip().startswith("```"): in_code_block = False code_blocks.append(("\n".join(current_block), i - len(current_block) + 1)) elif in_code_block: current_block.append(line) return code_blocks def check_code_blocks(markdown_file_paths: list[str], exclude_patterns: list[str] | None = None) -> None: """Check Python code blocks in a Markdown file for syntax errors.""" files_with_errors: list[str] = [] exclude_patterns = exclude_patterns or [] for markdown_file_path in markdown_file_paths: # Skip files that match any exclude pattern if any(pattern in markdown_file_path for pattern in exclude_patterns): logger.info(f"Skipping {markdown_file_path} (matches exclude pattern)") continue code_blocks = extract_python_code_blocks(markdown_file_path) had_errors = False for code_block, line_no in code_blocks: markdown_file_path_with_line_no = f"{markdown_file_path}:{line_no}" logger.info("Checking a code block in %s...", markdown_file_path_with_line_no) # Skip blocks that don't import agent_framework modules or import lab modules if (all( all(import_code not in code_block for import_code in [f"import {module}", f"from {module}"]) for module in ["agent_framework"] ) or "agent_framework.lab" in code_block): logger.info(f' {with_color("OK[ignored]", Colors.CGREENBG)}') continue with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file: temp_file.write(code_block.encode("utf-8")) temp_file.flush() # Run pyright on the temporary file using subprocess.run result = subprocess.run(["uv", "run", "pyright", temp_file.name], capture_output=True, text=True, cwd=".") # nosec if result.returncode != 0: highlighted_code = highlight(code_block, PythonLexer(), TerminalFormatter()) # type: ignore logger.info( f" {with_color('FAIL', Colors.CREDBG)}\n" f"{with_color('========================================================', Colors.CGREY)}\n" f"{with_color('Error', Colors.CRED)}: Pyright found issues in {with_color(markdown_file_path_with_line_no, Colors.CVIOLET)}:\n" f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n" f"{highlighted_code}\n" f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n" "\n" f"{with_color('pyright output:', Colors.CVIOLET)}\n" f"{with_color(result.stdout, Colors.CRED)}" f"{with_color('========================================================', Colors.CGREY)}\n" ) had_errors = True else: logger.info(f" {with_color('OK', Colors.CGREENBG)}") if had_errors: files_with_errors.append(markdown_file_path) if files_with_errors: raise RuntimeError("Syntax errors found in the following files:\n" + "\n".join(files_with_errors)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Check code blocks in Markdown files for syntax errors.") # Argument is a list of markdown files containing glob patterns parser.add_argument("markdown_files", nargs="+", help="Markdown files to check (supports glob patterns).") parser.add_argument("--exclude", action="append", help="Exclude files containing this pattern.") parser.add_argument("--no-glob", action="store_true", help="Treat file arguments as literal paths (no glob expansion).") args = parser.parse_args() # Expand glob patterns to actual file paths (or skip if --no-glob) expanded_files = expand_file_patterns(args.markdown_files, skip_glob=args.no_glob) check_code_blocks(expanded_files, args.exclude)