Custom Metric¶
Create your own evaluation metric.
Retrieval Metric¶
from autorag_research.evaluation.metrics import (
BaseRetrievalMetricConfig,
metric,
MetricInput,
)
from dataclasses import dataclass
@metric(fields_to_check=["retrieval_gt", "retrieved_ids"])
def hit_rate(metric_input: MetricInput) -> float:
"""Returns 1 if any ground truth doc was retrieved, 0 otherwise."""
gt_ids = set()
for group in metric_input.retrieval_gt:
gt_ids.update(group)
retrieved = set(metric_input.retrieved_ids)
return 1.0 if gt_ids & retrieved else 0.0
@dataclass
class HitRateConfig(BaseRetrievalMetricConfig):
def get_metric_func(self):
return hit_rate
Generation Metric¶
from autorag_research.evaluation.metrics import (
BaseGenerationMetricConfig,
metric_loop,
MetricInput,
)
from dataclasses import dataclass
@metric_loop(fields_to_check=["generation_gt", "generated_texts"])
def exact_match(metric_inputs: list[MetricInput]) -> list[float]:
"""Returns 1 if generated text exactly matches any ground truth."""
scores = []
for inp in metric_inputs:
generated = inp.generated_texts.strip().lower()
matches = any(gt.strip().lower() == generated for gt in inp.generation_gt)
scores.append(1.0 if matches else 0.0)
return scores
@dataclass
class ExactMatchConfig(BaseGenerationMetricConfig):
def get_metric_func(self):
return exact_match
Add Configuration¶
# configs/metrics/retrieval/hit_rate.yaml
_target_: my_module.HitRateConfig
Use in Experiment¶
# configs/experiment.yaml
metrics:
retrieval:
- recall
- hit_rate # your metric
generation:
- rouge
- exact_match # your metric
Next¶
- Metrics - See existing implementations
- Custom Pipeline - Test algorithms