pacai.core.game
1import abc 2import argparse 3import copy 4import logging 5import math 6import os 7import random 8import typing 9 10import edq.util.json 11 12import pacai.core.action 13import pacai.core.agentaction 14import pacai.core.agentinfo 15import pacai.core.isolation.level 16import pacai.core.ui 17import pacai.util.alias 18 19DEFAULT_MAX_TURNS: int = -1 20DEFAULT_AGENT_START_TIMEOUT: float = 0.0 21DEFAULT_AGENT_END_TIMEOUT: float = 0.0 22DEFAULT_AGENT_ACTION_TIMEOUT: float = 0.0 23 24DEFAULT_AGENT: str = pacai.util.alias.AGENT_RANDOM.short 25 26class GameInfo(edq.util.json.DictConverter): 27 """ 28 A simple container that holds common information about a game. 29 """ 30 31 def __init__(self, 32 board_source: str, 33 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 34 isolation_level: pacai.core.isolation.level.Level = pacai.core.isolation.level.Level.NONE, 35 max_turns: int = DEFAULT_MAX_TURNS, 36 agent_start_timeout: float = DEFAULT_AGENT_START_TIMEOUT, 37 agent_end_timeout: float = DEFAULT_AGENT_END_TIMEOUT, 38 agent_action_timeout: float = DEFAULT_AGENT_ACTION_TIMEOUT, 39 seed: int | None = None, 40 training: bool = False, 41 extra_info: dict[str, typing.Any] | None = None, 42 ) -> None: 43 if (seed is None): 44 seed = random.randint(0, 2**64) 45 46 self.seed: int = seed 47 """ The random seed for this game's RNG. """ 48 49 self.board_source: str = board_source 50 """ Where the board from this game is loaded from. """ 51 52 self.agent_infos: dict[int, pacai.core.agentinfo.AgentInfo] = agent_infos 53 """ The required information for creating the agents for this game. """ 54 55 if (len(self.agent_infos) == 0): 56 raise ValueError("No agents provided.") 57 58 self.isolation_level: pacai.core.isolation.level.Level = isolation_level 59 """ The isolation level to use for this game. """ 60 61 self.max_turns: int = max_turns 62 """ 63 The total number of moves (between all agents) allowed for this game. 64 If -1, unlimited moves are allowed. 65 """ 66 67 self.agent_start_timeout: float = agent_start_timeout 68 """ 69 The maximum number of seconds an agent is allowed when starting a game. 70 If <= 0, unlimited time is allowed. 71 """ 72 73 self.agent_end_timeout: float = agent_end_timeout 74 """ 75 The maximum number of seconds an agent is allowed when ending a game. 76 If <= 0, unlimited time is allowed. 77 """ 78 79 self.agent_action_timeout: float = agent_action_timeout 80 """ 81 The maximum number of seconds an agent is allowed when getting an action. 82 If <= 0, unlimited time is allowed. 83 """ 84 85 self.training: bool = training 86 """ Whether this game is meant for training agents. """ 87 88 if (extra_info is None): 89 extra_info = {} 90 91 self.extra_info: dict[str, typing.Any] = extra_info 92 """ Any additional arguments passed to the game. """ 93 94 def to_dict(self) -> dict[str, typing.Any]: 95 return { 96 'seed': self.seed, 97 'board_source': self.board_source, 98 'agent_infos': {id: info.to_dict() for (id, info) in self.agent_infos.items()}, 99 'isolation_level': self.isolation_level.value, 100 'max_turns': self.max_turns, 101 'agent_start_timeout': self.agent_start_timeout, 102 'agent_end_timeout': self.agent_end_timeout, 103 'agent_action_timeout': self.agent_action_timeout, 104 'training': self.training, 105 'extra_info': self.extra_info, 106 } 107 108 @classmethod 109 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 110 return cls( 111 seed = data.get('seed', None), 112 board_source = data['board_source'], 113 agent_infos = {int(id): pacai.core.agentinfo.AgentInfo.from_dict(raw_info) for (id, raw_info) in data['agent_infos'].items()}, 114 isolation_level = pacai.core.isolation.level.Level(data.get('isolation_level', pacai.core.isolation.level.Level.NONE.value)), 115 max_turns = data.get('max_turns', DEFAULT_MAX_TURNS), 116 agent_start_timeout = data.get('agent_start_timeout', DEFAULT_AGENT_START_TIMEOUT), 117 agent_end_timeout = data.get('agent_end_timeout', DEFAULT_AGENT_END_TIMEOUT), 118 agent_action_timeout = data.get('agent_action_timeout', DEFAULT_AGENT_ACTION_TIMEOUT), 119 training = data.get('training', False), 120 extra_info = data.get('extra_info', None)) 121 122class GameResult(edq.util.json.DictConverter): 123 """ The result of running a game. """ 124 125 def __init__(self, 126 game_id: int, 127 game_info: GameInfo, 128 score: float = 0, 129 game_timeout: bool = False, 130 timeout_agent_indexes: list[int] | None = None, 131 crash_agent_indexes: list[int] | None = None, 132 winning_agent_indexes: list[int] | None = None, 133 start_time: edq.util.time.Timestamp | None = None, 134 end_time: edq.util.time.Timestamp | None = None, 135 history: list[pacai.core.agentaction.AgentActionRecord] | None = None, 136 agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] | None = None, 137 **kwargs: typing.Any) -> None: 138 self.game_id: int = game_id 139 """ The ID of the game result. """ 140 141 self.game_info: GameInfo = game_info 142 """ The core information about this game. """ 143 144 if (start_time is None): 145 start_time = edq.util.time.Timestamp.now() 146 147 self.start_time: edq.util.time.Timestamp = start_time 148 """ The time the game started at. """ 149 150 self.end_time: edq.util.time.Timestamp | None = end_time 151 """ The time the game ended at. """ 152 153 if (history is None): 154 history = [] 155 156 self.history: list[pacai.core.agentaction.AgentActionRecord] = history 157 """ The history of actions taken by each agent in this game. """ 158 159 if (agent_complete_records is None): 160 agent_complete_records = {} 161 162 self.agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] = agent_complete_records 163 """ 164 The record recieved from an agent when the game finishes. 165 For agents that learn, this may include information that the agent learned this game. 166 """ 167 168 self.score: float = score 169 """ The score of the game. """ 170 171 self.game_timeout: bool = game_timeout 172 """ Indicates that the game has timed out (reached the maximum number of moves). """ 173 174 if (timeout_agent_indexes is None): 175 timeout_agent_indexes = [] 176 177 self.timeout_agent_indexes: list[int] = timeout_agent_indexes 178 """ The list of agents that timed out in this game. """ 179 180 if (crash_agent_indexes is None): 181 crash_agent_indexes = [] 182 183 self.crash_agent_indexes: list[int] = crash_agent_indexes 184 """ The list of agents that crashed in this game. """ 185 186 if (winning_agent_indexes is None): 187 winning_agent_indexes = [] 188 189 self.winning_agent_indexes: list[int] = winning_agent_indexes 190 """ 191 The agents that are considered the "winner" of this game. 192 Games may interpret this value in different ways. 193 """ 194 195 def to_dict(self) -> dict[str, typing.Any]: 196 return { 197 'game_id': self.game_id, 198 'game_info': self.game_info.to_dict(), 199 'start_time': self.start_time, 200 'end_time': self.end_time, 201 'history': [item.to_dict() for item in self.history], 202 'agent_complete_records': {agent_index: record.to_dict() for (agent_index, record) in self.agent_complete_records.items()}, 203 'score': self.score, 204 'game_timeout': self.game_timeout, 205 'timeout_agent_indexes': self.timeout_agent_indexes, 206 'crash_agent_indexes': self.crash_agent_indexes, 207 'winning_agent_indexes': self.winning_agent_indexes, 208 } 209 210 @classmethod 211 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 212 agent_complete_records = {} 213 for (agent_index, raw_record) in data.get('agent_complete_records', {}).items(): 214 agent_complete_records[agent_index] = pacai.core.agentaction.AgentActionRecord.from_dict(raw_record) 215 216 return cls( 217 data['game_id'], 218 GameInfo.from_dict(data['game_info']), 219 start_time = data.get('start_time', None), 220 end_time = data.get('end_time', None), 221 history = [pacai.core.agentaction.AgentActionRecord.from_dict(item) for item in data.get('history', [])], 222 agent_complete_records = agent_complete_records, 223 score = data.get('score', 0), 224 game_timeout = data.get('game_timeout', False), 225 timeout_agent_indexes = data.get('timeout_agent_indexes', None), 226 crash_agent_indexes = data.get('crash_agent_indexes', None), 227 winning_agent_indexes = data.get('winning_agent_indexes', -1), 228 ) 229 230 def get_duration_secs(self) -> float: 231 """ 232 Get the game's duration in seconds. 233 Will return positive infinity if the game has no end time 234 (it is still going or crashed (in very rare cases)). 235 """ 236 237 if (self.end_time is None): 238 return math.inf 239 240 return self.end_time.sub(self.start_time).to_secs() 241 242class Game(abc.ABC): 243 """ 244 A game that can be run in pacai. 245 Games combine the rules, layouts, and agents to run. 246 """ 247 248 def __init__(self, 249 game_info: GameInfo, 250 board: pacai.core.board.Board, 251 save_path: str | None = None, 252 is_replay: bool = False, 253 ) -> None: 254 self.game_info: GameInfo = game_info 255 """ The core information about this game. """ 256 257 self._board: pacai.core.board.Board = board 258 """ The board this game will be played on. """ 259 260 self._save_path: str | None = save_path 261 """ Where to save the results of this game. """ 262 263 self._is_replay: bool = is_replay 264 """ 265 Indicates that this game is being loaded from a replay. 266 Some behavior, like saving the result, will be modified. 267 """ 268 269 def process_args(self, args: argparse.Namespace) -> None: 270 """ Process any special arguments from the command-line. """ 271 272 @abc.abstractmethod 273 def get_initial_state(self, 274 rng: random.Random, 275 board: pacai.core.board.Board, 276 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 277 ) -> pacai.core.gamestate.GameState: 278 """ Create the initial state for this game. """ 279 280 def process_turn(self, 281 state: pacai.core.gamestate.GameState, 282 action_record: pacai.core.agentaction.AgentActionRecord, 283 result: GameResult, 284 rng: random.Random, 285 ) -> pacai.core.gamestate.GameState: 286 """ 287 Process the given agent action and return an updated game state. 288 The returned game state may be a copy or modified version of the passed in game state. 289 """ 290 291 # The agent has timed out. 292 if (action_record.timeout): 293 result.timeout_agent_indexes.append(action_record.agent_index) 294 state.process_agent_timeout(action_record.agent_index) 295 return state 296 297 # The agent has crashed. 298 if (action_record.crashed): 299 result.crash_agent_indexes.append(action_record.agent_index) 300 state.process_agent_crash(action_record.agent_index) 301 return state 302 303 action = action_record.get_action() 304 if (action not in state.get_legal_actions()): 305 raise ValueError(f"Illegal action for agent {action_record.agent_index}: '{action}'.") 306 307 self._call_state_process_turn_full(state, action, rng) 308 309 return state 310 311 def _call_state_process_turn_full(self, 312 state: pacai.core.gamestate.GameState, 313 action: pacai.core.action.Action, 314 rng: random.Random) -> None: 315 """ Call on the game state to process a full turn. """ 316 317 state.process_turn_full(action, rng) 318 319 def check_end(self, state: pacai.core.gamestate.GameState) -> bool: 320 """ 321 Check to see if the game is over. 322 Return True if the game is now over, False otherwise. 323 324 By default, this will just check pacai.core.gamestate.GameState.game_over, 325 but child games can override for more complex functionality. 326 """ 327 328 return state.game_over 329 330 def game_complete(self, state: pacai.core.gamestate.GameState, result: GameResult) -> None: 331 """ 332 Make any last adjustments to the game result after the game is over. 333 """ 334 335 def run(self, ui: pacai.core.ui.UI) -> GameResult: 336 """ 337 The main "game loop" for all games. 338 """ 339 340 logging.debug("Starting a game with seed: %d.", self.game_info.seed) 341 342 # Create a new random number generator just for this game. 343 rng = random.Random(self.game_info.seed) 344 345 # Keep track of what happens during this game. 346 game_id = rng.randint(0, 2**64) 347 result = GameResult(game_id, self.game_info) 348 349 # Initialize the agent isolator. 350 isolator = self.game_info.isolation_level.get_isolator() 351 isolator.init_agents(self.game_info.agent_infos) 352 353 # Keep track of all the user inputs since the last time an agent moved. 354 # Note that we need to keep track for all agents, 355 # since the UI will only tell us the inputs since the last call. 356 agent_user_inputs: dict[int, list[pacai.core.action.Action]] = {} 357 for agent_index in self.game_info.agent_infos: 358 agent_user_inputs[agent_index] = [] 359 360 # Create the initial game state (and force it's seed). 361 state = self.get_initial_state(rng, self._board, self.game_info.agent_infos) 362 state.seed = game_id 363 state.game_start() 364 365 board_highlights: list[pacai.core.board.Highlight] = [] 366 367 # Notify agents about the start of the game. 368 records = isolator.game_start(rng, state, self.game_info.agent_start_timeout) 369 for record in records.values(): 370 if (record.timeout): 371 result.timeout_agent_indexes.append(record.agent_index) 372 state.process_agent_timeout(record.agent_index) 373 elif (record.crashed): 374 result.crash_agent_indexes.append(record.agent_index) 375 state.process_agent_crash(record.agent_index) 376 else: 377 board_highlights += record.get_board_highlights() 378 379 state.agents_game_start(records) 380 381 # Start the UI. 382 ui.game_start(state, board_highlights = board_highlights) 383 384 while (not self.check_end(state)): 385 logging.trace("Turn %d, agent %d.", state.turn_count, state.agent_index) # type: ignore[attr-defined] # pylint: disable=no-member 386 387 # Receive any user inputs from the UI. 388 self._receive_user_inputs(agent_user_inputs, ui) 389 390 # Get the next action from the agent. 391 action_record = isolator.get_action(state, agent_user_inputs[state.agent_index], self.game_info.agent_action_timeout) 392 393 # Check if we need to clear any user inputs. 394 if (action_record.get_clear_inputs()): 395 agent_user_inputs[state.agent_index] = [] 396 397 # Execute the next action and update the state. 398 state = self.process_turn(state, action_record, result, rng) 399 400 # Update the UI. 401 ui.update(state, board_highlights = action_record.get_board_highlights()) 402 403 # Update the game result and move history. 404 result.history.append(action_record) 405 406 # Check for game ending conditions. 407 if (self.check_end(state)): 408 break 409 410 # Check if this game has ran for the maximum number of turns. 411 if ((self.game_info.max_turns > 0) and (state.turn_count >= self.game_info.max_turns)): 412 state.process_game_timeout() 413 result.game_timeout = True 414 break 415 416 # Mark the end time of the game. 417 result.end_time = edq.util.time.Timestamp.now() 418 419 # Notify the state about the end of the game. 420 winners = state.game_complete() 421 result.winning_agent_indexes += winners 422 423 result.score = state.score 424 425 # Notify agents about the end of this game. 426 result.agent_complete_records = isolator.game_complete(state, self.game_info.agent_end_timeout) 427 428 # All the game to make final updates to the result. 429 self.game_complete(state, result) 430 431 # Update the UI. 432 ui.game_complete(state) 433 434 # Cleanup 435 isolator.close() 436 ui.close() 437 438 if ((not self._is_replay) and (self._save_path is not None)): 439 logging.info("Saving results to '%s'.", self._save_path) 440 edq.util.json.dump_path(result, self._save_path) 441 442 return result 443 444 def _receive_user_inputs(self, 445 agent_user_inputs: dict[int, list[pacai.core.action.Action]], 446 ui: pacai.core.ui.UI, 447 ) -> None: 448 """ Add the current user inputs to the running list for each agent. """ 449 450 new_user_inputs = ui.get_user_inputs() 451 452 for user_inputs in agent_user_inputs.values(): 453 user_inputs += new_user_inputs 454 455def set_cli_args(parser: argparse.ArgumentParser, default_board: str | None = None) -> argparse.ArgumentParser: 456 """ 457 Set common CLI arguments. 458 This is a sibling to init_from_args(), as the arguments set here can be interpreted there. 459 """ 460 461 parser.add_argument('--board', dest = 'board', 462 action = 'store', type = str, default = default_board, 463 help = ('Play on this board (default: %(default)s).' 464 + ' This may be the full path to a board, or just a filename.' 465 + ' If just a filename, than the `pacai/resources/boards` directory will be checked (using a ".board" extension.')) 466 467 parser.add_argument('--num-games', dest = 'num_games', 468 action = 'store', type = int, default = 1, 469 help = 'The number of games to play (default: %(default)s).') 470 471 parser.add_argument('--num-training', dest = 'num_training', 472 action = 'store', type = int, default = 0, 473 help = 'The number of games to play in training mode before playing `--num-games` real games (default: %(default)s).') 474 475 parser.add_argument('--seed', dest = 'seed', 476 action = 'store', type = int, default = None, 477 help = 'The random seed for the game (will be randomly generated if not set.') 478 479 parser.add_argument('--max-turns', dest = 'max_turns', 480 action = 'store', type = int, default = DEFAULT_MAX_TURNS, 481 help = 'The maximum number of turns/moves (total for all agents) allowed in this game (-1 for unlimited) (default: %(default)s).') 482 483 parser.add_argument('--agent-start-timeout', dest = 'agent_start_timeout', 484 action = 'store', type = float, default = DEFAULT_AGENT_START_TIMEOUT, 485 help = ('The maximum number of seconds each agent is allowed when starting a game (<= 0 for unlimited time) (default: %(default)s).' 486 + ' Note that the "none" isolation level cannot enforce timeouts.')) 487 488 parser.add_argument('--agent-end-timeout', dest = 'agent_end_timeout', 489 action = 'store', type = float, default = DEFAULT_AGENT_END_TIMEOUT, 490 help = ('The maximum number of seconds each agent is allowed when ending a game (<= 0 for unlimited time) (default: %(default)s).' 491 + ' Note that the "none" isolation level cannot enforce timeouts.')) 492 493 parser.add_argument('--agent-action-timeout', dest = 'agent_action_timeout', 494 action = 'store', type = float, default = DEFAULT_AGENT_ACTION_TIMEOUT, 495 help = ('The maximum number of seconds each agent is allowed when getting an action (<= 0 for unlimited time) (default: %(default)s).' 496 + ' Note that the "none" isolation level cannot enforce timeouts.')) 497 498 parser.add_argument('--isolation', dest = 'isolation_level', metavar = 'LEVEL', 499 action = 'store', type = str, default = pacai.core.isolation.level.Level.NONE.value, 500 choices = pacai.core.isolation.level.LEVELS, 501 help = ('Set the agent isolation level for this game (default: %(default)s).' 502 + ' Choose one of:' 503 + ' `none` -- Do not make any attempt to isolate the agent code from the game (fastest and least secure),' 504 + ' `process` -- Run the agent code in a separate process' 505 + ' (offers some protection, but still vulnerable to disk or execution exploits),' 506 + ' `tcp` -- Open TCP listeners to communicate with agents (most secure, requires additional work to set up agents).')) 507 508 parser.add_argument('--agent-arg', dest = 'raw_agent_args', metavar = 'ARG', 509 action = 'append', type = str, default = [], 510 help = ('Specify arguments directly to agents (may be used multiple times).' 511 + ' The value for this argument must be formatted as "agent_index::key=value",' 512 + ' for example to set `foo = 9` for agent 3 and `bar = a` for agent 2, we can use:' 513 + ' `--agent-arg 3::foo=9 --agent-arg 1::bar=a`.')) 514 515 parser.add_argument('--remove-agent', dest = 'remove_agent_indexes', metavar = 'AGENT_INDEX', 516 action = 'append', type = int, default = [], 517 help = 'Remove this agent from the board before starting (may be used multiple times).') 518 519 parser.add_argument('--save-path', dest = 'save_path', 520 action = 'store', type = str, default = None, 521 help = ('If specified, write the result of this game to the specified location.' 522 + ' This file can be replayed with `--replay-path`.')) 523 524 parser.add_argument('--replay-path', dest = 'replay_path', 525 action = 'store', type = str, default = None, 526 help = 'If specified, replay the game whose result was saved at the specified path with `--save-path`.') 527 528 return parser 529 530def init_from_args( 531 args: argparse.Namespace, 532 game_class: typing.Type[Game], 533 base_agent_infos: dict[int, pacai.core.agentinfo.AgentInfo] | None = None, 534 remove_agent_indexes: list[int] | None = None, 535 board_options: dict[str, typing.Any] | None = None, 536 ) -> argparse.Namespace: 537 """ 538 Take in args from a parser that was passed to set_cli_args(), 539 and initialize the proper components. 540 This will create a number of games (and related resources) 541 based on `--num-games` + `--num-training`. 542 Each of these resources will be placed in their respective list at 543 `args._boards`, `args._agent_infos`, or `args._games`. 544 """ 545 546 if (base_agent_infos is None): 547 base_agent_infos = {} 548 549 if (remove_agent_indexes is None): 550 remove_agent_indexes = [] 551 552 # If this is a replay, 553 # then all the core arguments are loaded differently (directly from the file). 554 # Use the replay file to override all the current options. 555 if (args.replay_path is not None): 556 _override_args_with_replay(args, base_agent_infos) 557 remove_agent_indexes = [] 558 559 if (args.board is None): 560 raise ValueError("No board was specified.") 561 562 total_games = args.num_games + args.num_training 563 564 if (total_games <= 0): 565 raise ValueError(f"At least one game must be played (--num-games + --num-training), {total_games} was specified.") 566 567 # Establish an RNG to generate seeds for each game using the given seed. 568 seed = args.seed 569 if (seed is None): 570 seed = random.randint(0, 2**64) 571 572 logging.debug("Using source seed for games: %d.", seed) 573 rng = random.Random(seed) 574 575 if (board_options is None): 576 board_options = {} 577 578 # Either take the board as given, or load it from a path. 579 if (isinstance(args.board, pacai.core.board.Board)): 580 board = args.board 581 else: 582 board = pacai.core.board.load_path(args.board, **board_options) 583 584 # Remove specified agents from the board. 585 remove_agent_indexes += args.remove_agent_indexes 586 for remove_agent_index in remove_agent_indexes: 587 board.remove_agent(remove_agent_index) 588 589 agent_infos = _parse_agent_infos(board.agent_indexes(), args.raw_agent_args, base_agent_infos, remove_agent_indexes) 590 591 base_save_path = args.save_path 592 593 all_boards = [] 594 all_agent_infos = [] 595 all_games = [] 596 597 for i in range(total_games): 598 game_seed = rng.randint(0, 2**64) 599 600 all_boards.append(board.copy()) 601 all_agent_infos.append(copy.deepcopy(agent_infos)) 602 603 game_info = GameInfo( 604 board.source, 605 all_agent_infos[-1], 606 isolation_level = pacai.core.isolation.level.Level(args.isolation_level), 607 max_turns = args.max_turns, 608 agent_start_timeout = args.agent_start_timeout, 609 agent_end_timeout = args.agent_end_timeout, 610 agent_action_timeout = args.agent_action_timeout, 611 training = (i < args.num_training), 612 seed = game_seed 613 ) 614 615 # Suffix the save path if there is more than one game. 616 save_path = base_save_path 617 if ((save_path is not None) and (total_games > 1)): 618 parts = os.path.splitext(save_path) 619 save_path = f"{parts[0]}_{i:03d}{parts[1]}" 620 621 game_args = { 622 'game_info': game_info, 623 'board': all_boards[-1], 624 'save_path': save_path, 625 } 626 627 game = game_class(**game_args) 628 game.process_args(args) 629 630 all_games.append(game) 631 632 setattr(args, '_boards', all_boards) 633 setattr(args, '_agent_infos', all_agent_infos) 634 setattr(args, '_games', all_games) 635 636 return args 637 638def _override_args_with_replay(args: argparse.Namespace, base_agent_infos: dict[int, pacai.core.agentinfo.AgentInfo]) -> None: 639 """ 640 Override the args with the settings from the replay in the args. 641 """ 642 643 logging.info("Loading replay from '%s'.", args.replay_path) 644 replay_info = typing.cast(GameResult, edq.util.json.load_object_path(args.replay_path, GameResult)) 645 646 # Overrides from the replay info. 647 args.board = replay_info.game_info.board_source 648 args.seed = replay_info.game_info.seed 649 650 # Special settings for replays. 651 args.num_games = 1 652 args.num_training = 0 653 args.max_turns = len(replay_info.history) 654 655 # Script the moves for each agent based on the replay's history. 656 scripted_actions: dict[int, list[pacai.core.action.Action]] = {} 657 for item in replay_info.history: 658 if (item.agent_index not in scripted_actions): 659 scripted_actions[item.agent_index] = [] 660 661 scripted_actions[item.agent_index].append(item.get_action()) 662 663 base_agent_infos.clear() 664 665 for (agent_index, actions) in scripted_actions.items(): 666 base_agent_infos[agent_index] = pacai.core.agentinfo.AgentInfo( 667 name = pacai.util.alias.AGENT_SCRIPTED.short, 668 move_delay = replay_info.game_info.agent_infos[agent_index].move_delay, 669 actions = actions, 670 ) 671 672def _parse_agent_infos( 673 agent_indexes: list[int], 674 raw_args: list[str], 675 base_agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 676 remove_agent_indexes: list[int]) -> dict[int, pacai.core.agentinfo.AgentInfo]: 677 # Initialize with random agents. 678 agent_info = {agent_index: pacai.core.agentinfo.AgentInfo(name = DEFAULT_AGENT) for agent_index in sorted(agent_indexes)} 679 680 # Take any args from the base args. 681 for (agent_index, base_agent_info) in base_agent_infos.items(): 682 if (agent_index in agent_info): 683 agent_info[agent_index].update(base_agent_info) 684 685 # Update with CLI args. 686 for raw_arg in raw_args: 687 raw_arg = raw_arg.strip() 688 if (len(raw_arg) == 0): 689 continue 690 691 parts = raw_arg.split('::', 1) 692 if (len(parts) != 2): 693 raise ValueError(f"Improperly formatted CLI agent argument: '{raw_arg}'.") 694 695 agent_index = int(parts[0]) 696 if (agent_index not in agent_info): 697 raise ValueError(f"CLI agent argument has an unknown agent index: {agent_index}.") 698 699 raw_pair = parts[1] 700 701 parts = raw_pair.split('=', 1) 702 if (len(parts) != 2): 703 raise ValueError(f"Improperly formatted CLI agent argument key/value pair: '{raw_pair}'.") 704 705 key = parts[0].strip() 706 value = parts[1].strip() 707 708 agent_info[agent_index].set_from_string(key, value) 709 710 # Remove specified agents. 711 for remove_agent_index in remove_agent_indexes: 712 if (remove_agent_index in agent_info): 713 del agent_info[remove_agent_index] 714 715 return agent_info
27class GameInfo(edq.util.json.DictConverter): 28 """ 29 A simple container that holds common information about a game. 30 """ 31 32 def __init__(self, 33 board_source: str, 34 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 35 isolation_level: pacai.core.isolation.level.Level = pacai.core.isolation.level.Level.NONE, 36 max_turns: int = DEFAULT_MAX_TURNS, 37 agent_start_timeout: float = DEFAULT_AGENT_START_TIMEOUT, 38 agent_end_timeout: float = DEFAULT_AGENT_END_TIMEOUT, 39 agent_action_timeout: float = DEFAULT_AGENT_ACTION_TIMEOUT, 40 seed: int | None = None, 41 training: bool = False, 42 extra_info: dict[str, typing.Any] | None = None, 43 ) -> None: 44 if (seed is None): 45 seed = random.randint(0, 2**64) 46 47 self.seed: int = seed 48 """ The random seed for this game's RNG. """ 49 50 self.board_source: str = board_source 51 """ Where the board from this game is loaded from. """ 52 53 self.agent_infos: dict[int, pacai.core.agentinfo.AgentInfo] = agent_infos 54 """ The required information for creating the agents for this game. """ 55 56 if (len(self.agent_infos) == 0): 57 raise ValueError("No agents provided.") 58 59 self.isolation_level: pacai.core.isolation.level.Level = isolation_level 60 """ The isolation level to use for this game. """ 61 62 self.max_turns: int = max_turns 63 """ 64 The total number of moves (between all agents) allowed for this game. 65 If -1, unlimited moves are allowed. 66 """ 67 68 self.agent_start_timeout: float = agent_start_timeout 69 """ 70 The maximum number of seconds an agent is allowed when starting a game. 71 If <= 0, unlimited time is allowed. 72 """ 73 74 self.agent_end_timeout: float = agent_end_timeout 75 """ 76 The maximum number of seconds an agent is allowed when ending a game. 77 If <= 0, unlimited time is allowed. 78 """ 79 80 self.agent_action_timeout: float = agent_action_timeout 81 """ 82 The maximum number of seconds an agent is allowed when getting an action. 83 If <= 0, unlimited time is allowed. 84 """ 85 86 self.training: bool = training 87 """ Whether this game is meant for training agents. """ 88 89 if (extra_info is None): 90 extra_info = {} 91 92 self.extra_info: dict[str, typing.Any] = extra_info 93 """ Any additional arguments passed to the game. """ 94 95 def to_dict(self) -> dict[str, typing.Any]: 96 return { 97 'seed': self.seed, 98 'board_source': self.board_source, 99 'agent_infos': {id: info.to_dict() for (id, info) in self.agent_infos.items()}, 100 'isolation_level': self.isolation_level.value, 101 'max_turns': self.max_turns, 102 'agent_start_timeout': self.agent_start_timeout, 103 'agent_end_timeout': self.agent_end_timeout, 104 'agent_action_timeout': self.agent_action_timeout, 105 'training': self.training, 106 'extra_info': self.extra_info, 107 } 108 109 @classmethod 110 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 111 return cls( 112 seed = data.get('seed', None), 113 board_source = data['board_source'], 114 agent_infos = {int(id): pacai.core.agentinfo.AgentInfo.from_dict(raw_info) for (id, raw_info) in data['agent_infos'].items()}, 115 isolation_level = pacai.core.isolation.level.Level(data.get('isolation_level', pacai.core.isolation.level.Level.NONE.value)), 116 max_turns = data.get('max_turns', DEFAULT_MAX_TURNS), 117 agent_start_timeout = data.get('agent_start_timeout', DEFAULT_AGENT_START_TIMEOUT), 118 agent_end_timeout = data.get('agent_end_timeout', DEFAULT_AGENT_END_TIMEOUT), 119 agent_action_timeout = data.get('agent_action_timeout', DEFAULT_AGENT_ACTION_TIMEOUT), 120 training = data.get('training', False), 121 extra_info = data.get('extra_info', None))
A simple container that holds common information about a game.
32 def __init__(self, 33 board_source: str, 34 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 35 isolation_level: pacai.core.isolation.level.Level = pacai.core.isolation.level.Level.NONE, 36 max_turns: int = DEFAULT_MAX_TURNS, 37 agent_start_timeout: float = DEFAULT_AGENT_START_TIMEOUT, 38 agent_end_timeout: float = DEFAULT_AGENT_END_TIMEOUT, 39 agent_action_timeout: float = DEFAULT_AGENT_ACTION_TIMEOUT, 40 seed: int | None = None, 41 training: bool = False, 42 extra_info: dict[str, typing.Any] | None = None, 43 ) -> None: 44 if (seed is None): 45 seed = random.randint(0, 2**64) 46 47 self.seed: int = seed 48 """ The random seed for this game's RNG. """ 49 50 self.board_source: str = board_source 51 """ Where the board from this game is loaded from. """ 52 53 self.agent_infos: dict[int, pacai.core.agentinfo.AgentInfo] = agent_infos 54 """ The required information for creating the agents for this game. """ 55 56 if (len(self.agent_infos) == 0): 57 raise ValueError("No agents provided.") 58 59 self.isolation_level: pacai.core.isolation.level.Level = isolation_level 60 """ The isolation level to use for this game. """ 61 62 self.max_turns: int = max_turns 63 """ 64 The total number of moves (between all agents) allowed for this game. 65 If -1, unlimited moves are allowed. 66 """ 67 68 self.agent_start_timeout: float = agent_start_timeout 69 """ 70 The maximum number of seconds an agent is allowed when starting a game. 71 If <= 0, unlimited time is allowed. 72 """ 73 74 self.agent_end_timeout: float = agent_end_timeout 75 """ 76 The maximum number of seconds an agent is allowed when ending a game. 77 If <= 0, unlimited time is allowed. 78 """ 79 80 self.agent_action_timeout: float = agent_action_timeout 81 """ 82 The maximum number of seconds an agent is allowed when getting an action. 83 If <= 0, unlimited time is allowed. 84 """ 85 86 self.training: bool = training 87 """ Whether this game is meant for training agents. """ 88 89 if (extra_info is None): 90 extra_info = {} 91 92 self.extra_info: dict[str, typing.Any] = extra_info 93 """ Any additional arguments passed to the game. """
The required information for creating the agents for this game.
The total number of moves (between all agents) allowed for this game. If -1, unlimited moves are allowed.
The maximum number of seconds an agent is allowed when starting a game. If <= 0, unlimited time is allowed.
The maximum number of seconds an agent is allowed when ending a game. If <= 0, unlimited time is allowed.
The maximum number of seconds an agent is allowed when getting an action. If <= 0, unlimited time is allowed.
95 def to_dict(self) -> dict[str, typing.Any]: 96 return { 97 'seed': self.seed, 98 'board_source': self.board_source, 99 'agent_infos': {id: info.to_dict() for (id, info) in self.agent_infos.items()}, 100 'isolation_level': self.isolation_level.value, 101 'max_turns': self.max_turns, 102 'agent_start_timeout': self.agent_start_timeout, 103 'agent_end_timeout': self.agent_end_timeout, 104 'agent_action_timeout': self.agent_action_timeout, 105 'training': self.training, 106 'extra_info': self.extra_info, 107 }
Return a dict that can be used to represent this object. If the dict is passed to from_dict(), an identical object should be reconstructed.
109 @classmethod 110 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 111 return cls( 112 seed = data.get('seed', None), 113 board_source = data['board_source'], 114 agent_infos = {int(id): pacai.core.agentinfo.AgentInfo.from_dict(raw_info) for (id, raw_info) in data['agent_infos'].items()}, 115 isolation_level = pacai.core.isolation.level.Level(data.get('isolation_level', pacai.core.isolation.level.Level.NONE.value)), 116 max_turns = data.get('max_turns', DEFAULT_MAX_TURNS), 117 agent_start_timeout = data.get('agent_start_timeout', DEFAULT_AGENT_START_TIMEOUT), 118 agent_end_timeout = data.get('agent_end_timeout', DEFAULT_AGENT_END_TIMEOUT), 119 agent_action_timeout = data.get('agent_action_timeout', DEFAULT_AGENT_ACTION_TIMEOUT), 120 training = data.get('training', False), 121 extra_info = data.get('extra_info', None))
Return an instance of this subclass created using the given dict. If the dict came from to_dict(), the returned object should be identical to the original.
123class GameResult(edq.util.json.DictConverter): 124 """ The result of running a game. """ 125 126 def __init__(self, 127 game_id: int, 128 game_info: GameInfo, 129 score: float = 0, 130 game_timeout: bool = False, 131 timeout_agent_indexes: list[int] | None = None, 132 crash_agent_indexes: list[int] | None = None, 133 winning_agent_indexes: list[int] | None = None, 134 start_time: edq.util.time.Timestamp | None = None, 135 end_time: edq.util.time.Timestamp | None = None, 136 history: list[pacai.core.agentaction.AgentActionRecord] | None = None, 137 agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] | None = None, 138 **kwargs: typing.Any) -> None: 139 self.game_id: int = game_id 140 """ The ID of the game result. """ 141 142 self.game_info: GameInfo = game_info 143 """ The core information about this game. """ 144 145 if (start_time is None): 146 start_time = edq.util.time.Timestamp.now() 147 148 self.start_time: edq.util.time.Timestamp = start_time 149 """ The time the game started at. """ 150 151 self.end_time: edq.util.time.Timestamp | None = end_time 152 """ The time the game ended at. """ 153 154 if (history is None): 155 history = [] 156 157 self.history: list[pacai.core.agentaction.AgentActionRecord] = history 158 """ The history of actions taken by each agent in this game. """ 159 160 if (agent_complete_records is None): 161 agent_complete_records = {} 162 163 self.agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] = agent_complete_records 164 """ 165 The record recieved from an agent when the game finishes. 166 For agents that learn, this may include information that the agent learned this game. 167 """ 168 169 self.score: float = score 170 """ The score of the game. """ 171 172 self.game_timeout: bool = game_timeout 173 """ Indicates that the game has timed out (reached the maximum number of moves). """ 174 175 if (timeout_agent_indexes is None): 176 timeout_agent_indexes = [] 177 178 self.timeout_agent_indexes: list[int] = timeout_agent_indexes 179 """ The list of agents that timed out in this game. """ 180 181 if (crash_agent_indexes is None): 182 crash_agent_indexes = [] 183 184 self.crash_agent_indexes: list[int] = crash_agent_indexes 185 """ The list of agents that crashed in this game. """ 186 187 if (winning_agent_indexes is None): 188 winning_agent_indexes = [] 189 190 self.winning_agent_indexes: list[int] = winning_agent_indexes 191 """ 192 The agents that are considered the "winner" of this game. 193 Games may interpret this value in different ways. 194 """ 195 196 def to_dict(self) -> dict[str, typing.Any]: 197 return { 198 'game_id': self.game_id, 199 'game_info': self.game_info.to_dict(), 200 'start_time': self.start_time, 201 'end_time': self.end_time, 202 'history': [item.to_dict() for item in self.history], 203 'agent_complete_records': {agent_index: record.to_dict() for (agent_index, record) in self.agent_complete_records.items()}, 204 'score': self.score, 205 'game_timeout': self.game_timeout, 206 'timeout_agent_indexes': self.timeout_agent_indexes, 207 'crash_agent_indexes': self.crash_agent_indexes, 208 'winning_agent_indexes': self.winning_agent_indexes, 209 } 210 211 @classmethod 212 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 213 agent_complete_records = {} 214 for (agent_index, raw_record) in data.get('agent_complete_records', {}).items(): 215 agent_complete_records[agent_index] = pacai.core.agentaction.AgentActionRecord.from_dict(raw_record) 216 217 return cls( 218 data['game_id'], 219 GameInfo.from_dict(data['game_info']), 220 start_time = data.get('start_time', None), 221 end_time = data.get('end_time', None), 222 history = [pacai.core.agentaction.AgentActionRecord.from_dict(item) for item in data.get('history', [])], 223 agent_complete_records = agent_complete_records, 224 score = data.get('score', 0), 225 game_timeout = data.get('game_timeout', False), 226 timeout_agent_indexes = data.get('timeout_agent_indexes', None), 227 crash_agent_indexes = data.get('crash_agent_indexes', None), 228 winning_agent_indexes = data.get('winning_agent_indexes', -1), 229 ) 230 231 def get_duration_secs(self) -> float: 232 """ 233 Get the game's duration in seconds. 234 Will return positive infinity if the game has no end time 235 (it is still going or crashed (in very rare cases)). 236 """ 237 238 if (self.end_time is None): 239 return math.inf 240 241 return self.end_time.sub(self.start_time).to_secs()
The result of running a game.
126 def __init__(self, 127 game_id: int, 128 game_info: GameInfo, 129 score: float = 0, 130 game_timeout: bool = False, 131 timeout_agent_indexes: list[int] | None = None, 132 crash_agent_indexes: list[int] | None = None, 133 winning_agent_indexes: list[int] | None = None, 134 start_time: edq.util.time.Timestamp | None = None, 135 end_time: edq.util.time.Timestamp | None = None, 136 history: list[pacai.core.agentaction.AgentActionRecord] | None = None, 137 agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] | None = None, 138 **kwargs: typing.Any) -> None: 139 self.game_id: int = game_id 140 """ The ID of the game result. """ 141 142 self.game_info: GameInfo = game_info 143 """ The core information about this game. """ 144 145 if (start_time is None): 146 start_time = edq.util.time.Timestamp.now() 147 148 self.start_time: edq.util.time.Timestamp = start_time 149 """ The time the game started at. """ 150 151 self.end_time: edq.util.time.Timestamp | None = end_time 152 """ The time the game ended at. """ 153 154 if (history is None): 155 history = [] 156 157 self.history: list[pacai.core.agentaction.AgentActionRecord] = history 158 """ The history of actions taken by each agent in this game. """ 159 160 if (agent_complete_records is None): 161 agent_complete_records = {} 162 163 self.agent_complete_records: dict[int, pacai.core.agentaction.AgentActionRecord] = agent_complete_records 164 """ 165 The record recieved from an agent when the game finishes. 166 For agents that learn, this may include information that the agent learned this game. 167 """ 168 169 self.score: float = score 170 """ The score of the game. """ 171 172 self.game_timeout: bool = game_timeout 173 """ Indicates that the game has timed out (reached the maximum number of moves). """ 174 175 if (timeout_agent_indexes is None): 176 timeout_agent_indexes = [] 177 178 self.timeout_agent_indexes: list[int] = timeout_agent_indexes 179 """ The list of agents that timed out in this game. """ 180 181 if (crash_agent_indexes is None): 182 crash_agent_indexes = [] 183 184 self.crash_agent_indexes: list[int] = crash_agent_indexes 185 """ The list of agents that crashed in this game. """ 186 187 if (winning_agent_indexes is None): 188 winning_agent_indexes = [] 189 190 self.winning_agent_indexes: list[int] = winning_agent_indexes 191 """ 192 The agents that are considered the "winner" of this game. 193 Games may interpret this value in different ways. 194 """
The history of actions taken by each agent in this game.
The record recieved from an agent when the game finishes. For agents that learn, this may include information that the agent learned this game.
The agents that are considered the "winner" of this game. Games may interpret this value in different ways.
196 def to_dict(self) -> dict[str, typing.Any]: 197 return { 198 'game_id': self.game_id, 199 'game_info': self.game_info.to_dict(), 200 'start_time': self.start_time, 201 'end_time': self.end_time, 202 'history': [item.to_dict() for item in self.history], 203 'agent_complete_records': {agent_index: record.to_dict() for (agent_index, record) in self.agent_complete_records.items()}, 204 'score': self.score, 205 'game_timeout': self.game_timeout, 206 'timeout_agent_indexes': self.timeout_agent_indexes, 207 'crash_agent_indexes': self.crash_agent_indexes, 208 'winning_agent_indexes': self.winning_agent_indexes, 209 }
Return a dict that can be used to represent this object. If the dict is passed to from_dict(), an identical object should be reconstructed.
211 @classmethod 212 def from_dict(cls, data: dict[str, typing.Any]) -> typing.Any: 213 agent_complete_records = {} 214 for (agent_index, raw_record) in data.get('agent_complete_records', {}).items(): 215 agent_complete_records[agent_index] = pacai.core.agentaction.AgentActionRecord.from_dict(raw_record) 216 217 return cls( 218 data['game_id'], 219 GameInfo.from_dict(data['game_info']), 220 start_time = data.get('start_time', None), 221 end_time = data.get('end_time', None), 222 history = [pacai.core.agentaction.AgentActionRecord.from_dict(item) for item in data.get('history', [])], 223 agent_complete_records = agent_complete_records, 224 score = data.get('score', 0), 225 game_timeout = data.get('game_timeout', False), 226 timeout_agent_indexes = data.get('timeout_agent_indexes', None), 227 crash_agent_indexes = data.get('crash_agent_indexes', None), 228 winning_agent_indexes = data.get('winning_agent_indexes', -1), 229 )
Return an instance of this subclass created using the given dict. If the dict came from to_dict(), the returned object should be identical to the original.
231 def get_duration_secs(self) -> float: 232 """ 233 Get the game's duration in seconds. 234 Will return positive infinity if the game has no end time 235 (it is still going or crashed (in very rare cases)). 236 """ 237 238 if (self.end_time is None): 239 return math.inf 240 241 return self.end_time.sub(self.start_time).to_secs()
Get the game's duration in seconds. Will return positive infinity if the game has no end time (it is still going or crashed (in very rare cases)).
243class Game(abc.ABC): 244 """ 245 A game that can be run in pacai. 246 Games combine the rules, layouts, and agents to run. 247 """ 248 249 def __init__(self, 250 game_info: GameInfo, 251 board: pacai.core.board.Board, 252 save_path: str | None = None, 253 is_replay: bool = False, 254 ) -> None: 255 self.game_info: GameInfo = game_info 256 """ The core information about this game. """ 257 258 self._board: pacai.core.board.Board = board 259 """ The board this game will be played on. """ 260 261 self._save_path: str | None = save_path 262 """ Where to save the results of this game. """ 263 264 self._is_replay: bool = is_replay 265 """ 266 Indicates that this game is being loaded from a replay. 267 Some behavior, like saving the result, will be modified. 268 """ 269 270 def process_args(self, args: argparse.Namespace) -> None: 271 """ Process any special arguments from the command-line. """ 272 273 @abc.abstractmethod 274 def get_initial_state(self, 275 rng: random.Random, 276 board: pacai.core.board.Board, 277 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 278 ) -> pacai.core.gamestate.GameState: 279 """ Create the initial state for this game. """ 280 281 def process_turn(self, 282 state: pacai.core.gamestate.GameState, 283 action_record: pacai.core.agentaction.AgentActionRecord, 284 result: GameResult, 285 rng: random.Random, 286 ) -> pacai.core.gamestate.GameState: 287 """ 288 Process the given agent action and return an updated game state. 289 The returned game state may be a copy or modified version of the passed in game state. 290 """ 291 292 # The agent has timed out. 293 if (action_record.timeout): 294 result.timeout_agent_indexes.append(action_record.agent_index) 295 state.process_agent_timeout(action_record.agent_index) 296 return state 297 298 # The agent has crashed. 299 if (action_record.crashed): 300 result.crash_agent_indexes.append(action_record.agent_index) 301 state.process_agent_crash(action_record.agent_index) 302 return state 303 304 action = action_record.get_action() 305 if (action not in state.get_legal_actions()): 306 raise ValueError(f"Illegal action for agent {action_record.agent_index}: '{action}'.") 307 308 self._call_state_process_turn_full(state, action, rng) 309 310 return state 311 312 def _call_state_process_turn_full(self, 313 state: pacai.core.gamestate.GameState, 314 action: pacai.core.action.Action, 315 rng: random.Random) -> None: 316 """ Call on the game state to process a full turn. """ 317 318 state.process_turn_full(action, rng) 319 320 def check_end(self, state: pacai.core.gamestate.GameState) -> bool: 321 """ 322 Check to see if the game is over. 323 Return True if the game is now over, False otherwise. 324 325 By default, this will just check pacai.core.gamestate.GameState.game_over, 326 but child games can override for more complex functionality. 327 """ 328 329 return state.game_over 330 331 def game_complete(self, state: pacai.core.gamestate.GameState, result: GameResult) -> None: 332 """ 333 Make any last adjustments to the game result after the game is over. 334 """ 335 336 def run(self, ui: pacai.core.ui.UI) -> GameResult: 337 """ 338 The main "game loop" for all games. 339 """ 340 341 logging.debug("Starting a game with seed: %d.", self.game_info.seed) 342 343 # Create a new random number generator just for this game. 344 rng = random.Random(self.game_info.seed) 345 346 # Keep track of what happens during this game. 347 game_id = rng.randint(0, 2**64) 348 result = GameResult(game_id, self.game_info) 349 350 # Initialize the agent isolator. 351 isolator = self.game_info.isolation_level.get_isolator() 352 isolator.init_agents(self.game_info.agent_infos) 353 354 # Keep track of all the user inputs since the last time an agent moved. 355 # Note that we need to keep track for all agents, 356 # since the UI will only tell us the inputs since the last call. 357 agent_user_inputs: dict[int, list[pacai.core.action.Action]] = {} 358 for agent_index in self.game_info.agent_infos: 359 agent_user_inputs[agent_index] = [] 360 361 # Create the initial game state (and force it's seed). 362 state = self.get_initial_state(rng, self._board, self.game_info.agent_infos) 363 state.seed = game_id 364 state.game_start() 365 366 board_highlights: list[pacai.core.board.Highlight] = [] 367 368 # Notify agents about the start of the game. 369 records = isolator.game_start(rng, state, self.game_info.agent_start_timeout) 370 for record in records.values(): 371 if (record.timeout): 372 result.timeout_agent_indexes.append(record.agent_index) 373 state.process_agent_timeout(record.agent_index) 374 elif (record.crashed): 375 result.crash_agent_indexes.append(record.agent_index) 376 state.process_agent_crash(record.agent_index) 377 else: 378 board_highlights += record.get_board_highlights() 379 380 state.agents_game_start(records) 381 382 # Start the UI. 383 ui.game_start(state, board_highlights = board_highlights) 384 385 while (not self.check_end(state)): 386 logging.trace("Turn %d, agent %d.", state.turn_count, state.agent_index) # type: ignore[attr-defined] # pylint: disable=no-member 387 388 # Receive any user inputs from the UI. 389 self._receive_user_inputs(agent_user_inputs, ui) 390 391 # Get the next action from the agent. 392 action_record = isolator.get_action(state, agent_user_inputs[state.agent_index], self.game_info.agent_action_timeout) 393 394 # Check if we need to clear any user inputs. 395 if (action_record.get_clear_inputs()): 396 agent_user_inputs[state.agent_index] = [] 397 398 # Execute the next action and update the state. 399 state = self.process_turn(state, action_record, result, rng) 400 401 # Update the UI. 402 ui.update(state, board_highlights = action_record.get_board_highlights()) 403 404 # Update the game result and move history. 405 result.history.append(action_record) 406 407 # Check for game ending conditions. 408 if (self.check_end(state)): 409 break 410 411 # Check if this game has ran for the maximum number of turns. 412 if ((self.game_info.max_turns > 0) and (state.turn_count >= self.game_info.max_turns)): 413 state.process_game_timeout() 414 result.game_timeout = True 415 break 416 417 # Mark the end time of the game. 418 result.end_time = edq.util.time.Timestamp.now() 419 420 # Notify the state about the end of the game. 421 winners = state.game_complete() 422 result.winning_agent_indexes += winners 423 424 result.score = state.score 425 426 # Notify agents about the end of this game. 427 result.agent_complete_records = isolator.game_complete(state, self.game_info.agent_end_timeout) 428 429 # All the game to make final updates to the result. 430 self.game_complete(state, result) 431 432 # Update the UI. 433 ui.game_complete(state) 434 435 # Cleanup 436 isolator.close() 437 ui.close() 438 439 if ((not self._is_replay) and (self._save_path is not None)): 440 logging.info("Saving results to '%s'.", self._save_path) 441 edq.util.json.dump_path(result, self._save_path) 442 443 return result 444 445 def _receive_user_inputs(self, 446 agent_user_inputs: dict[int, list[pacai.core.action.Action]], 447 ui: pacai.core.ui.UI, 448 ) -> None: 449 """ Add the current user inputs to the running list for each agent. """ 450 451 new_user_inputs = ui.get_user_inputs() 452 453 for user_inputs in agent_user_inputs.values(): 454 user_inputs += new_user_inputs
A game that can be run in pacai. Games combine the rules, layouts, and agents to run.
270 def process_args(self, args: argparse.Namespace) -> None: 271 """ Process any special arguments from the command-line. """
Process any special arguments from the command-line.
273 @abc.abstractmethod 274 def get_initial_state(self, 275 rng: random.Random, 276 board: pacai.core.board.Board, 277 agent_infos: dict[int, pacai.core.agentinfo.AgentInfo], 278 ) -> pacai.core.gamestate.GameState: 279 """ Create the initial state for this game. """
Create the initial state for this game.
281 def process_turn(self, 282 state: pacai.core.gamestate.GameState, 283 action_record: pacai.core.agentaction.AgentActionRecord, 284 result: GameResult, 285 rng: random.Random, 286 ) -> pacai.core.gamestate.GameState: 287 """ 288 Process the given agent action and return an updated game state. 289 The returned game state may be a copy or modified version of the passed in game state. 290 """ 291 292 # The agent has timed out. 293 if (action_record.timeout): 294 result.timeout_agent_indexes.append(action_record.agent_index) 295 state.process_agent_timeout(action_record.agent_index) 296 return state 297 298 # The agent has crashed. 299 if (action_record.crashed): 300 result.crash_agent_indexes.append(action_record.agent_index) 301 state.process_agent_crash(action_record.agent_index) 302 return state 303 304 action = action_record.get_action() 305 if (action not in state.get_legal_actions()): 306 raise ValueError(f"Illegal action for agent {action_record.agent_index}: '{action}'.") 307 308 self._call_state_process_turn_full(state, action, rng) 309 310 return state
Process the given agent action and return an updated game state. The returned game state may be a copy or modified version of the passed in game state.
320 def check_end(self, state: pacai.core.gamestate.GameState) -> bool: 321 """ 322 Check to see if the game is over. 323 Return True if the game is now over, False otherwise. 324 325 By default, this will just check pacai.core.gamestate.GameState.game_over, 326 but child games can override for more complex functionality. 327 """ 328 329 return state.game_over
Check to see if the game is over. Return True if the game is now over, False otherwise.
By default, this will just check pacai.core.gamestate.GameState.game_over, but child games can override for more complex functionality.
331 def game_complete(self, state: pacai.core.gamestate.GameState, result: GameResult) -> None: 332 """ 333 Make any last adjustments to the game result after the game is over. 334 """
Make any last adjustments to the game result after the game is over.
336 def run(self, ui: pacai.core.ui.UI) -> GameResult: 337 """ 338 The main "game loop" for all games. 339 """ 340 341 logging.debug("Starting a game with seed: %d.", self.game_info.seed) 342 343 # Create a new random number generator just for this game. 344 rng = random.Random(self.game_info.seed) 345 346 # Keep track of what happens during this game. 347 game_id = rng.randint(0, 2**64) 348 result = GameResult(game_id, self.game_info) 349 350 # Initialize the agent isolator. 351 isolator = self.game_info.isolation_level.get_isolator() 352 isolator.init_agents(self.game_info.agent_infos) 353 354 # Keep track of all the user inputs since the last time an agent moved. 355 # Note that we need to keep track for all agents, 356 # since the UI will only tell us the inputs since the last call. 357 agent_user_inputs: dict[int, list[pacai.core.action.Action]] = {} 358 for agent_index in self.game_info.agent_infos: 359 agent_user_inputs[agent_index] = [] 360 361 # Create the initial game state (and force it's seed). 362 state = self.get_initial_state(rng, self._board, self.game_info.agent_infos) 363 state.seed = game_id 364 state.game_start() 365 366 board_highlights: list[pacai.core.board.Highlight] = [] 367 368 # Notify agents about the start of the game. 369 records = isolator.game_start(rng, state, self.game_info.agent_start_timeout) 370 for record in records.values(): 371 if (record.timeout): 372 result.timeout_agent_indexes.append(record.agent_index) 373 state.process_agent_timeout(record.agent_index) 374 elif (record.crashed): 375 result.crash_agent_indexes.append(record.agent_index) 376 state.process_agent_crash(record.agent_index) 377 else: 378 board_highlights += record.get_board_highlights() 379 380 state.agents_game_start(records) 381 382 # Start the UI. 383 ui.game_start(state, board_highlights = board_highlights) 384 385 while (not self.check_end(state)): 386 logging.trace("Turn %d, agent %d.", state.turn_count, state.agent_index) # type: ignore[attr-defined] # pylint: disable=no-member 387 388 # Receive any user inputs from the UI. 389 self._receive_user_inputs(agent_user_inputs, ui) 390 391 # Get the next action from the agent. 392 action_record = isolator.get_action(state, agent_user_inputs[state.agent_index], self.game_info.agent_action_timeout) 393 394 # Check if we need to clear any user inputs. 395 if (action_record.get_clear_inputs()): 396 agent_user_inputs[state.agent_index] = [] 397 398 # Execute the next action and update the state. 399 state = self.process_turn(state, action_record, result, rng) 400 401 # Update the UI. 402 ui.update(state, board_highlights = action_record.get_board_highlights()) 403 404 # Update the game result and move history. 405 result.history.append(action_record) 406 407 # Check for game ending conditions. 408 if (self.check_end(state)): 409 break 410 411 # Check if this game has ran for the maximum number of turns. 412 if ((self.game_info.max_turns > 0) and (state.turn_count >= self.game_info.max_turns)): 413 state.process_game_timeout() 414 result.game_timeout = True 415 break 416 417 # Mark the end time of the game. 418 result.end_time = edq.util.time.Timestamp.now() 419 420 # Notify the state about the end of the game. 421 winners = state.game_complete() 422 result.winning_agent_indexes += winners 423 424 result.score = state.score 425 426 # Notify agents about the end of this game. 427 result.agent_complete_records = isolator.game_complete(state, self.game_info.agent_end_timeout) 428 429 # All the game to make final updates to the result. 430 self.game_complete(state, result) 431 432 # Update the UI. 433 ui.game_complete(state) 434 435 # Cleanup 436 isolator.close() 437 ui.close() 438 439 if ((not self._is_replay) and (self._save_path is not None)): 440 logging.info("Saving results to '%s'.", self._save_path) 441 edq.util.json.dump_path(result, self._save_path) 442 443 return result
The main "game loop" for all games.
456def set_cli_args(parser: argparse.ArgumentParser, default_board: str | None = None) -> argparse.ArgumentParser: 457 """ 458 Set common CLI arguments. 459 This is a sibling to init_from_args(), as the arguments set here can be interpreted there. 460 """ 461 462 parser.add_argument('--board', dest = 'board', 463 action = 'store', type = str, default = default_board, 464 help = ('Play on this board (default: %(default)s).' 465 + ' This may be the full path to a board, or just a filename.' 466 + ' If just a filename, than the `pacai/resources/boards` directory will be checked (using a ".board" extension.')) 467 468 parser.add_argument('--num-games', dest = 'num_games', 469 action = 'store', type = int, default = 1, 470 help = 'The number of games to play (default: %(default)s).') 471 472 parser.add_argument('--num-training', dest = 'num_training', 473 action = 'store', type = int, default = 0, 474 help = 'The number of games to play in training mode before playing `--num-games` real games (default: %(default)s).') 475 476 parser.add_argument('--seed', dest = 'seed', 477 action = 'store', type = int, default = None, 478 help = 'The random seed for the game (will be randomly generated if not set.') 479 480 parser.add_argument('--max-turns', dest = 'max_turns', 481 action = 'store', type = int, default = DEFAULT_MAX_TURNS, 482 help = 'The maximum number of turns/moves (total for all agents) allowed in this game (-1 for unlimited) (default: %(default)s).') 483 484 parser.add_argument('--agent-start-timeout', dest = 'agent_start_timeout', 485 action = 'store', type = float, default = DEFAULT_AGENT_START_TIMEOUT, 486 help = ('The maximum number of seconds each agent is allowed when starting a game (<= 0 for unlimited time) (default: %(default)s).' 487 + ' Note that the "none" isolation level cannot enforce timeouts.')) 488 489 parser.add_argument('--agent-end-timeout', dest = 'agent_end_timeout', 490 action = 'store', type = float, default = DEFAULT_AGENT_END_TIMEOUT, 491 help = ('The maximum number of seconds each agent is allowed when ending a game (<= 0 for unlimited time) (default: %(default)s).' 492 + ' Note that the "none" isolation level cannot enforce timeouts.')) 493 494 parser.add_argument('--agent-action-timeout', dest = 'agent_action_timeout', 495 action = 'store', type = float, default = DEFAULT_AGENT_ACTION_TIMEOUT, 496 help = ('The maximum number of seconds each agent is allowed when getting an action (<= 0 for unlimited time) (default: %(default)s).' 497 + ' Note that the "none" isolation level cannot enforce timeouts.')) 498 499 parser.add_argument('--isolation', dest = 'isolation_level', metavar = 'LEVEL', 500 action = 'store', type = str, default = pacai.core.isolation.level.Level.NONE.value, 501 choices = pacai.core.isolation.level.LEVELS, 502 help = ('Set the agent isolation level for this game (default: %(default)s).' 503 + ' Choose one of:' 504 + ' `none` -- Do not make any attempt to isolate the agent code from the game (fastest and least secure),' 505 + ' `process` -- Run the agent code in a separate process' 506 + ' (offers some protection, but still vulnerable to disk or execution exploits),' 507 + ' `tcp` -- Open TCP listeners to communicate with agents (most secure, requires additional work to set up agents).')) 508 509 parser.add_argument('--agent-arg', dest = 'raw_agent_args', metavar = 'ARG', 510 action = 'append', type = str, default = [], 511 help = ('Specify arguments directly to agents (may be used multiple times).' 512 + ' The value for this argument must be formatted as "agent_index::key=value",' 513 + ' for example to set `foo = 9` for agent 3 and `bar = a` for agent 2, we can use:' 514 + ' `--agent-arg 3::foo=9 --agent-arg 1::bar=a`.')) 515 516 parser.add_argument('--remove-agent', dest = 'remove_agent_indexes', metavar = 'AGENT_INDEX', 517 action = 'append', type = int, default = [], 518 help = 'Remove this agent from the board before starting (may be used multiple times).') 519 520 parser.add_argument('--save-path', dest = 'save_path', 521 action = 'store', type = str, default = None, 522 help = ('If specified, write the result of this game to the specified location.' 523 + ' This file can be replayed with `--replay-path`.')) 524 525 parser.add_argument('--replay-path', dest = 'replay_path', 526 action = 'store', type = str, default = None, 527 help = 'If specified, replay the game whose result was saved at the specified path with `--save-path`.') 528 529 return parser
Set common CLI arguments. This is a sibling to init_from_args(), as the arguments set here can be interpreted there.
531def init_from_args( 532 args: argparse.Namespace, 533 game_class: typing.Type[Game], 534 base_agent_infos: dict[int, pacai.core.agentinfo.AgentInfo] | None = None, 535 remove_agent_indexes: list[int] | None = None, 536 board_options: dict[str, typing.Any] | None = None, 537 ) -> argparse.Namespace: 538 """ 539 Take in args from a parser that was passed to set_cli_args(), 540 and initialize the proper components. 541 This will create a number of games (and related resources) 542 based on `--num-games` + `--num-training`. 543 Each of these resources will be placed in their respective list at 544 `args._boards`, `args._agent_infos`, or `args._games`. 545 """ 546 547 if (base_agent_infos is None): 548 base_agent_infos = {} 549 550 if (remove_agent_indexes is None): 551 remove_agent_indexes = [] 552 553 # If this is a replay, 554 # then all the core arguments are loaded differently (directly from the file). 555 # Use the replay file to override all the current options. 556 if (args.replay_path is not None): 557 _override_args_with_replay(args, base_agent_infos) 558 remove_agent_indexes = [] 559 560 if (args.board is None): 561 raise ValueError("No board was specified.") 562 563 total_games = args.num_games + args.num_training 564 565 if (total_games <= 0): 566 raise ValueError(f"At least one game must be played (--num-games + --num-training), {total_games} was specified.") 567 568 # Establish an RNG to generate seeds for each game using the given seed. 569 seed = args.seed 570 if (seed is None): 571 seed = random.randint(0, 2**64) 572 573 logging.debug("Using source seed for games: %d.", seed) 574 rng = random.Random(seed) 575 576 if (board_options is None): 577 board_options = {} 578 579 # Either take the board as given, or load it from a path. 580 if (isinstance(args.board, pacai.core.board.Board)): 581 board = args.board 582 else: 583 board = pacai.core.board.load_path(args.board, **board_options) 584 585 # Remove specified agents from the board. 586 remove_agent_indexes += args.remove_agent_indexes 587 for remove_agent_index in remove_agent_indexes: 588 board.remove_agent(remove_agent_index) 589 590 agent_infos = _parse_agent_infos(board.agent_indexes(), args.raw_agent_args, base_agent_infos, remove_agent_indexes) 591 592 base_save_path = args.save_path 593 594 all_boards = [] 595 all_agent_infos = [] 596 all_games = [] 597 598 for i in range(total_games): 599 game_seed = rng.randint(0, 2**64) 600 601 all_boards.append(board.copy()) 602 all_agent_infos.append(copy.deepcopy(agent_infos)) 603 604 game_info = GameInfo( 605 board.source, 606 all_agent_infos[-1], 607 isolation_level = pacai.core.isolation.level.Level(args.isolation_level), 608 max_turns = args.max_turns, 609 agent_start_timeout = args.agent_start_timeout, 610 agent_end_timeout = args.agent_end_timeout, 611 agent_action_timeout = args.agent_action_timeout, 612 training = (i < args.num_training), 613 seed = game_seed 614 ) 615 616 # Suffix the save path if there is more than one game. 617 save_path = base_save_path 618 if ((save_path is not None) and (total_games > 1)): 619 parts = os.path.splitext(save_path) 620 save_path = f"{parts[0]}_{i:03d}{parts[1]}" 621 622 game_args = { 623 'game_info': game_info, 624 'board': all_boards[-1], 625 'save_path': save_path, 626 } 627 628 game = game_class(**game_args) 629 game.process_args(args) 630 631 all_games.append(game) 632 633 setattr(args, '_boards', all_boards) 634 setattr(args, '_agent_infos', all_agent_infos) 635 setattr(args, '_games', all_games) 636 637 return args
Take in args from a parser that was passed to set_cli_args(),
and initialize the proper components.
This will create a number of games (and related resources)
based on --num-games
+ --num-training
.
Each of these resources will be placed in their respective list at
args._boards
, args._agent_infos
, or args._games
.