Source code for dash_evals.runner.scorers.flutter_scoring

"""Scoring utilities for Flutter code evaluation."""

from typing import Tuple

from dash_evals.runner.scorers.flutter_output_parser import AnalyzerResult, TestResult


[docs] def calculate_analyzer_score(result: AnalyzerResult) -> Tuple[float, str]: """ Calculate score from analyzer results. Args: result: Parsed analyzer output Returns: Tuple of (score, explanation) Examples: >>> from dash_evals.utils.flutter_output_parser import AnalyzerResult, TestResult >>> result = AnalyzerResult(0, 0, 0, "") >>> score, explanation = calculate_analyzer_score(result) >>> score 1.0 """ total_issues = result.error_count + result.warning_count + (result.info_count * 0.5) if total_issues == 0: return 1.0, "✓ No analyzer issues" elif total_issues <= 3: return ( 0.7, f"⚠ Minor issues: {result.error_count} errors, {result.warning_count} warnings", ) else: return ( 0.3, f"✗ Multiple issues: {result.error_count} errors, {result.warning_count} warnings", )
[docs] def calculate_test_score(result: TestResult) -> Tuple[float, str]: """ Calculate score from test results as a percentage of tests passed. Args: result: Parsed test output Returns: Tuple of (score, explanation) where score is the percentage of tests passed (0.0-1.0) Examples: >>> from dash_evals.parsers.flutter_output_parser import TestResult >>> result = TestResult(passed=True, raw_output="") >>> score, explanation = calculate_test_score(result) >>> score 1.0 """ if result.passed: return 1.0, "✓ All tests passed" # Calculate percentage based on actual test counts total_tests = result.passed_count + result.failed_count if total_tests == 0: return 0.0, "✗ No tests found or executed" pass_rate = result.passed_count / total_tests if pass_rate == 1.0: return 1.0, f"✓ All tests passed ({result.passed_count}/{total_tests})" elif pass_rate == 0.0: return 0.0, f"✗ All tests failed (0/{total_tests})" else: return ( pass_rate, f"⚠ {result.passed_count}/{total_tests} tests passed ({pass_rate:.0%})", )
[docs] def validate_code_structure(code: str, required_widgets: list) -> Tuple[float, str]: """ Validate that code contains required structural elements. Args: code: The generated Dart code required_widgets: List of required widget names from metadata Returns: Tuple of (score, explanation) Examples: >>> code = "class MyApp extends StatelessWidget { MaterialApp() }" >>> score, explanation = validate_code_structure(code, ["TextField"]) >>> score >= 0.7 True """ required_elements = [] # Check for required widgets from target if "MyApp" in code: required_elements.append("MyApp class") if "StatefulWidget" in code or "StatelessWidget" in code: required_elements.append("Widget structure") if "MaterialApp" in code: required_elements.append("MaterialApp") # Check for specific requirements from metadata for widget in required_widgets: if widget in code: required_elements.append(widget) # Score based on required elements if len(required_elements) >= len(required_widgets) + 2: return 1.0, f"✓ Contains required elements: {', '.join(required_elements)}" elif len(required_elements) >= len(required_widgets): return 0.7, "⚠ Missing some elements" else: return 0.3, "✗ Missing required elements"
[docs] def calculate_final_score( analyzer_score: float, test_score: float, structure_score: float, weights: dict | None = None, ) -> float: """ Calculate weighted final score. Args: analyzer_score: Code quality score (0.0-1.0) test_score: Test pass rate (0.0-1.0) structure_score: Code structure score (0.0-1.0) weights: Optional custom weights (default: {"analyzer": 0.3, "test": 0.5, "structure": 0.2}) Returns: Weighted final score (0.0-1.0) Examples: >>> calculate_final_score(1.0, 1.0, 1.0) 1.0 >>> calculate_final_score(0.0, 1.0, 0.0) # Test score is 50% of total 0.5 """ if weights is None: weights = {"analyzer": 0.3, "test": 0.5, "structure": 0.2} return ( analyzer_score * weights["analyzer"] + test_score * weights["test"] + structure_score * weights["structure"] )