Source code for dash_evals.runner.scorers.dart_analyze

"""Dart static analysis scorer.

Reusable scorer that runs ``dart analyze`` on auto-discovered project roots
and scores based on output.
"""

import os

from inspect_ai.scorer import CORRECT, INCORRECT, Score, Scorer, Target, accuracy, scorer
from inspect_ai.solver import TaskState
from inspect_ai.util import sandbox



[docs]
@scorer(metrics=[accuracy()])
def dart_analyze_scorer(strict: bool = False, project_dir: str | None = None) -> Scorer:
    """
    Score based on dart static analysis results.

    Scoping behavior (in priority order):

    1. If ``project_dir`` argument is set, analyze only that subdirectory.
    2. If ``state.metadata["project_dir"]`` exists, use that.
    3. Fall back to auto-discovering all ``pubspec.yaml`` files.

    Scores:
    - CORRECT if no errors in any project (and no warnings if strict=True)
    - INCORRECT if any project has errors

    Args:
        strict: If True, also fail on warnings. Default False (only errors fail).
        project_dir: Optional subdirectory to scope analysis to.
                     Relative to the workspace root.

    Returns:
        A Scorer that evaluates Dart code quality via static analysis.
    """

    async def score(state: TaskState, target: Target) -> Score:
        sb = sandbox()
        workspace = state.metadata.get("workspace")

        if not workspace:
            return Score(
                value=INCORRECT,
                explanation="No workspace found - setup may have failed",
            )

        # Determine target project directory(ies)
        scope = project_dir or state.metadata.get("project_dir")  # noqa: F823

        if scope:
            # Scoped to a specific project subdirectory
            project_dirs = [scope]
        else:
            # Discover all Dart/Flutter projects by finding pubspec.yaml files
            find_result = await sb.exec(
                ["find", ".", "-name", "pubspec.yaml", "-not", "-path", "*/.*"],
                cwd=workspace,
                timeout=30,
            )

            pubspec_paths = [
                p.strip() for p in (find_result.stdout or "").splitlines() if p.strip()
            ]

            if not pubspec_paths:
                # Fallback: try analyzing workspace root directly
                pubspec_paths = ["."]

            # Derive project directories from pubspec.yaml paths
            project_dirs = sorted({os.path.dirname(p) or "." for p in pubspec_paths})

        # Run dart analyze in each project directory
        all_outputs: list[str] = []
        has_errors = False
        has_warnings = False

        for proj_dir in project_dirs:
            project_cwd = os.path.join(workspace, proj_dir)

            args = ["dart", "analyze", "."]
            if strict:
                args.append("--fatal-infos")

            result = await sb.exec(args, cwd=project_cwd, timeout=60)

            stdout = result.stdout or ""
            stderr = result.stderr or ""
            output = stdout + stderr

            # Tag output with the project directory for clarity
            labeled = f"[{proj_dir}] {output.strip()}"
            all_outputs.append(labeled)

            if "error •" in output.lower() or result.returncode != 0:
                has_errors = True
            if "warning •" in output.lower():
                has_warnings = True

        combined = "\n\n".join(all_outputs)

        if has_errors:
            return Score(
                value=INCORRECT,
                explanation=f"Static analysis failed:\n{combined[:2000]}",
                metadata={
                    "analyze_output": combined,
                    "projects_analyzed": project_dirs,
                },
            )

        if strict and has_warnings:
            return Score(
                value=INCORRECT,
                explanation=f"Static analysis has warnings (strict mode):\n{combined[:2000]}",
                metadata={
                    "analyze_output": combined,
                    "projects_analyzed": project_dirs,
                },
            )

        # Count info-level issues across all projects
        info_count = combined.lower().count("info •")

        return Score(
            value=CORRECT,
            explanation=f"Static analysis passed across {len(project_dirs)} project(s) "
            f"({info_count} info-level issues)",
            metadata={
                "analyze_output": combined,
                "info_count": info_count,
                "projects_analyzed": project_dirs,
            },
        )

    return score