Files
agent-framework/python/check_md_code_blocks.py
Ren Finlayson 539852f81c
Some checks are pending
CodeQL / Analyze (csharp) (push) Waiting to run
CodeQL / Analyze (python) (push) Waiting to run
dotnet-build-and-test / paths-filter (push) Waiting to run
dotnet-build-and-test / dotnet-build-and-test (Debug, windows-latest, net9.0) (push) Blocked by required conditions
dotnet-build-and-test / dotnet-build-and-test (Release, integration, true, ubuntu-latest, net10.0) (push) Blocked by required conditions
dotnet-build-and-test / dotnet-build-and-test (Release, integration, true, windows-latest, net472) (push) Blocked by required conditions
dotnet-build-and-test / dotnet-build-and-test (Release, ubuntu-latest, net8.0) (push) Blocked by required conditions
dotnet-build-and-test / dotnet-build-and-test-check (push) Blocked by required conditions
test
2026-01-24 03:05:12 +11:00

142 lines
6.1 KiB
Python

# Copyright (c) Microsoft. All rights reserved.
"""Check code blocks in Markdown files for syntax errors."""
import argparse
from enum import Enum
import glob
import logging
import tempfile
import subprocess # nosec
from pygments import highlight # type: ignore
from pygments.formatters import TerminalFormatter
from pygments.lexers import PythonLexer
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)
class Colors(str, Enum):
CEND = "\33[0m"
CRED = "\33[31m"
CREDBG = "\33[41m"
CGREEN = "\33[32m"
CGREENBG = "\33[42m"
CVIOLET = "\33[35m"
CGREY = "\33[90m"
def with_color(text: str, color: Colors) -> str:
"""Prints a string with the specified color."""
return f"{color.value}{text}{Colors.CEND.value}"
def expand_file_patterns(patterns: list[str], skip_glob: bool = False) -> list[str]:
"""Expand glob patterns to actual file paths."""
all_files: list[str] = []
for pattern in patterns:
if skip_glob:
# When skip_glob is True, treat patterns as literal file paths
# Only include if it's a markdown file
if pattern.endswith('.md'):
matches = glob.glob(pattern, recursive=False)
all_files.extend(matches)
else:
# Handle both relative and absolute paths with glob expansion
matches = glob.glob(pattern, recursive=True)
all_files.extend(matches)
return sorted(set(all_files)) # Remove duplicates and sort
def extract_python_code_blocks(markdown_file_path: str) -> list[tuple[str, int]]:
"""Extract Python code blocks from a Markdown file."""
with open(markdown_file_path, encoding="utf-8") as file:
lines = file.readlines()
code_blocks: list[tuple[str, int]] = []
in_code_block = False
current_block: list[str] = []
for i, line in enumerate(lines):
if line.strip().startswith("```python"):
in_code_block = True
current_block = []
elif line.strip().startswith("```"):
in_code_block = False
code_blocks.append(("\n".join(current_block), i - len(current_block) + 1))
elif in_code_block:
current_block.append(line)
return code_blocks
def check_code_blocks(markdown_file_paths: list[str], exclude_patterns: list[str] | None = None) -> None:
"""Check Python code blocks in a Markdown file for syntax errors."""
files_with_errors: list[str] = []
exclude_patterns = exclude_patterns or []
for markdown_file_path in markdown_file_paths:
# Skip files that match any exclude pattern
if any(pattern in markdown_file_path for pattern in exclude_patterns):
logger.info(f"Skipping {markdown_file_path} (matches exclude pattern)")
continue
code_blocks = extract_python_code_blocks(markdown_file_path)
had_errors = False
for code_block, line_no in code_blocks:
markdown_file_path_with_line_no = f"{markdown_file_path}:{line_no}"
logger.info("Checking a code block in %s...", markdown_file_path_with_line_no)
# Skip blocks that don't import agent_framework modules or import lab modules
if (all(
all(import_code not in code_block for import_code in [f"import {module}", f"from {module}"])
for module in ["agent_framework"]
) or "agent_framework.lab" in code_block):
logger.info(f' {with_color("OK[ignored]", Colors.CGREENBG)}')
continue
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
temp_file.write(code_block.encode("utf-8"))
temp_file.flush()
# Run pyright on the temporary file using subprocess.run
result = subprocess.run(["uv", "run", "pyright", temp_file.name], capture_output=True, text=True, cwd=".") # nosec
if result.returncode != 0:
highlighted_code = highlight(code_block, PythonLexer(), TerminalFormatter()) # type: ignore
logger.info(
f" {with_color('FAIL', Colors.CREDBG)}\n"
f"{with_color('========================================================', Colors.CGREY)}\n"
f"{with_color('Error', Colors.CRED)}: Pyright found issues in {with_color(markdown_file_path_with_line_no, Colors.CVIOLET)}:\n"
f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"
f"{highlighted_code}\n"
f"{with_color('--------------------------------------------------------', Colors.CGREY)}\n"
"\n"
f"{with_color('pyright output:', Colors.CVIOLET)}\n"
f"{with_color(result.stdout, Colors.CRED)}"
f"{with_color('========================================================', Colors.CGREY)}\n"
)
had_errors = True
else:
logger.info(f" {with_color('OK', Colors.CGREENBG)}")
if had_errors:
files_with_errors.append(markdown_file_path)
if files_with_errors:
raise RuntimeError("Syntax errors found in the following files:\n" + "\n".join(files_with_errors))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Check code blocks in Markdown files for syntax errors.")
# Argument is a list of markdown files containing glob patterns
parser.add_argument("markdown_files", nargs="+", help="Markdown files to check (supports glob patterns).")
parser.add_argument("--exclude", action="append", help="Exclude files containing this pattern.")
parser.add_argument("--no-glob", action="store_true", help="Treat file arguments as literal paths (no glob expansion).")
args = parser.parse_args()
# Expand glob patterns to actual file paths (or skip if --no-glob)
expanded_files = expand_file_patterns(args.markdown_files, skip_glob=args.no_glob)
check_code_blocks(expanded_files, args.exclude)