Source code for dash_evals.runner.scorers.dart_analyze

"""Dart static analysis scorer.

Reusable scorer that runs ``dart analyze`` on auto-discovered project roots
and scores based on output.
"""

import os

from inspect_ai.scorer import CORRECT, INCORRECT, Score, Scorer, Target, accuracy, scorer
from inspect_ai.solver import TaskState
from inspect_ai.util import sandbox


[docs] @scorer(metrics=[accuracy()]) def dart_analyze_scorer(strict: bool = False, project_dir: str | None = None) -> Scorer: """ Score based on dart static analysis results. Scoping behavior (in priority order): 1. If ``project_dir`` argument is set, analyze only that subdirectory. 2. If ``state.metadata["project_dir"]`` exists, use that. 3. Fall back to auto-discovering all ``pubspec.yaml`` files. Scores: - CORRECT if no errors in any project (and no warnings if strict=True) - INCORRECT if any project has errors Args: strict: If True, also fail on warnings. Default False (only errors fail). project_dir: Optional subdirectory to scope analysis to. Relative to the workspace root. Returns: A Scorer that evaluates Dart code quality via static analysis. """ async def score(state: TaskState, target: Target) -> Score: sb = sandbox() workspace = state.metadata.get("workspace") if not workspace: return Score( value=INCORRECT, explanation="No workspace found - setup may have failed", ) # Determine target project directory(ies) scope = project_dir or state.metadata.get("project_dir") # noqa: F823 if scope: # Scoped to a specific project subdirectory project_dirs = [scope] else: # Discover all Dart/Flutter projects by finding pubspec.yaml files find_result = await sb.exec( ["find", ".", "-name", "pubspec.yaml", "-not", "-path", "*/.*"], cwd=workspace, timeout=30, ) pubspec_paths = [ p.strip() for p in (find_result.stdout or "").splitlines() if p.strip() ] if not pubspec_paths: # Fallback: try analyzing workspace root directly pubspec_paths = ["."] # Derive project directories from pubspec.yaml paths project_dirs = sorted({os.path.dirname(p) or "." for p in pubspec_paths}) # Run dart analyze in each project directory all_outputs: list[str] = [] has_errors = False has_warnings = False for proj_dir in project_dirs: project_cwd = os.path.join(workspace, proj_dir) args = ["dart", "analyze", "."] if strict: args.append("--fatal-infos") result = await sb.exec(args, cwd=project_cwd, timeout=60) stdout = result.stdout or "" stderr = result.stderr or "" output = stdout + stderr # Tag output with the project directory for clarity labeled = f"[{proj_dir}] {output.strip()}" all_outputs.append(labeled) if "error •" in output.lower() or result.returncode != 0: has_errors = True if "warning •" in output.lower(): has_warnings = True combined = "\n\n".join(all_outputs) if has_errors: return Score( value=INCORRECT, explanation=f"Static analysis failed:\n{combined[:2000]}", metadata={ "analyze_output": combined, "projects_analyzed": project_dirs, }, ) if strict and has_warnings: return Score( value=INCORRECT, explanation=f"Static analysis has warnings (strict mode):\n{combined[:2000]}", metadata={ "analyze_output": combined, "projects_analyzed": project_dirs, }, ) # Count info-level issues across all projects info_count = combined.lower().count("info •") return Score( value=CORRECT, explanation=f"Static analysis passed across {len(project_dirs)} project(s) " f"({info_count} info-level issues)", metadata={ "analyze_output": combined, "info_count": info_count, "projects_analyzed": project_dirs, }, ) return score