Source code for dash_evals.runner.tasks.question_answer

"""QA tasks for evaluating model Q&A capabilities."""

from textwrap import dedent

from inspect_ai import Task, task
from inspect_ai.dataset import Dataset
from inspect_ai.scorer import model_graded_fact
from inspect_ai.solver import chain_of_thought

from ..solvers import add_system_message
from .task_helpers import (
    append_context_injection,
    append_model_interaction,
    build_task_metadata,
)

DEFAULT_QA_SYSTEM_MESSAGE = dedent("""
    You are a helpful and knowledgeable coding assistant.
    Answer questions clearly and accurately, providing examples when helpful.
""")


def _build_qa_solver(system_msg: str, config: dict):
    """
    Build solver chain for QA tasks.

    Includes chain_of_thought for improved reasoning.
    """
    solver_chain = [add_system_message(system_msg)]
    append_context_injection(solver_chain, config)
    solver_chain.append(chain_of_thought())
    append_model_interaction(solver_chain, config)

    return solver_chain


[docs] @task def question_answer(dataset: Dataset, config: dict) -> Task: """ Generic QA task for evaluating model Q&A capabilities. Args: dataset: Inspect dataset loaded from JSONL. config: Task manifest entry with variant, system_message, etc. """ system_msg = config.get("system_message") or DEFAULT_QA_SYSTEM_MESSAGE solver = _build_qa_solver(system_msg, config) return Task( name=config["task_name"], dataset=dataset, solver=solver, scorer=model_graded_fact(), time_limit=300, metadata=build_task_metadata(config), )