Source code for dash_evals.runner.tasks.analyze_codebase
"""
Analyze Codebase Task
Evaluates LLM ability to explore and answer questions about an existing codebase.
The model gets read-only access to workspace files via bash commands,
but is instructed not to modify any files.
"""
from textwrap import dedent
from typing import cast
from inspect_ai import Task, task
from inspect_ai.agent import react
from inspect_ai.dataset import Dataset
from inspect_ai.model import ChatMessageSystem
from inspect_ai.scorer import model_graded_fact
from inspect_ai.solver import Generate, Solver, TaskState, solver
from inspect_ai.tool import bash
from dash_evals.runner.scorers import export_workspace
from dash_evals.runner.solvers import setup_workspace
from .task_helpers import (
append_context_injection,
build_task_metadata,
get_skill_tool,
)
DEFAULT_ANALYZE_SYSTEM_MESSAGE = dedent("""\
You are an expert code reviewer analyzing a codebase.
Your task is to:
1. Explore the codebase at {workspace} using the available tools
2. Understand the project structure, dependencies, and architecture
3. Answer the user's question based on what you find in the code
Important guidelines:
- Use bash commands (cat, find, grep, ls, head, tail, etc.) to browse files
- Do NOT edit or modify any files
- Base your answer on actual code you find, not assumptions
- Reference specific files and line numbers when relevant
- When done, call submit() with your complete answer
""")
@solver
def _add_workspace_system_message(template: str) -> Solver:
"""Add system message with workspace path substituted from metadata."""
async def solve(state: TaskState, generate: Generate) -> TaskState:
workspace = state.metadata.get("workspace", "/workspace")
message = template.format(workspace=workspace)
state.messages.insert(0, ChatMessageSystem(content=message))
return state
return solve
def _build_solver_chain(config: dict, system_message: str) -> list:
"""Build the solver chain for analyze codebase tasks."""
solver_chain = []
solver_chain.append(_add_workspace_system_message(system_message))
append_context_injection(solver_chain, config)
tools = [
bash(timeout=120),
]
skill_tool = get_skill_tool(config)
if skill_tool:
tools.append(skill_tool)
solver_chain.append(
cast(
Solver,
react(
name="code_analyzer",
description="Expert code reviewer who explores and analyzes codebases.",
tools=tools,
),
)
)
return solver_chain
[docs]
@task
def analyze_codebase(dataset: Dataset, config: dict) -> Task:
"""
Task for evaluating LLM ability to explore and answer questions about a codebase.
Args:
dataset: Inspect dataset loaded from JSONL.
config: Task manifest entry with variant, system_message, etc.
"""
system_message = config.get("system_message") or DEFAULT_ANALYZE_SYSTEM_MESSAGE
solver_chain = _build_solver_chain(config, system_message)
scorers: list = [model_graded_fact()]
if config.get("save_examples"):
scorers.append(export_workspace())
return Task(
name=config["task_name"],
dataset=dataset,
setup=[setup_workspace()],
solver=solver_chain,
scorer=scorers,
time_limit=300,
metadata=build_task_metadata(config),
)