from typing import Tuple, Dict, Any, List import time # Dummy minimal classes (safe fallback if your imports break) class Observation: def __init__(self, logs=None, alerts=None, risk_score=0.0): self.logs = logs or [] self.alerts = alerts or [] self.risk_score = risk_score class RewardBreakdown: pass class Reward: def __init__(self, value=0.0, breakdown=None, message="", cumulative=0.0): self.value = value self.breakdown = breakdown or RewardBreakdown() self.message = message self.cumulative = cumulative class IncidentResponseEnv: def __init__(self): self._step_count = 0 self._max_steps = 50 self._done = False self._cumulative_reward = 0.0 # Minimal safe dummy data self._logs = [{"log_id": "L1", "message": "login success"}] self._alerts = [{"alert_id": "A1", "severity": "low"}] def reset(self) -> Tuple[Observation, Dict[str, Any]]: """Reset environment (MANDATORY for OpenEnv).""" self._step_count = 0 self._done = False self._cumulative_reward = 0.0 obs = Observation( logs=self._logs, alerts=self._alerts, risk_score=0.1 ) return obs, {} def step(self, action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]: """Step function (MANDATORY for OpenEnv).""" self._step_count += 1 done = self._step_count >= self._max_steps reward = Reward( value=0.01, message="step executed", cumulative=self._cumulative_reward + 0.01 ) self._cumulative_reward += 0.01 obs = Observation( logs=self._logs, alerts=self._alerts, risk_score=0.1 ) return obs, reward, done, {}