1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3""" 4Defines an environment for an agent playing Rock Paper Scissors against the environment. 5""" 6 7from __future__ import division 8from __future__ import print_function 9from __future__ import unicode_literals 10 11import os 12import random 13import sys 14 15# Insert the package's parent directory into the system search path, so that this package can be 16# imported when the aixi.py script is run directly from a release archive. 17PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) 18sys.path.insert(0, PROJECT_ROOT) 19 20from pyaixi import environment, util 21 22# Define a enumeration to represent rock-paper-scissors actions, which is the 23# agent performing either a rock, paper, or a scissors move. 24rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors') 25 26# Define a enumeration to represent rock-paper-scissors observations, which is the 27# opponent performing either a rock, paper, or a scissors move. 28rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper', 'oScissors') 29 30# Define a enumeration to represent losing, drawing, or winning. 31rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin') 32 33# Define some shorthand notation for ease of reference. 34aRock = rock_paper_scissors_action_enum.aRock 35aPaper = rock_paper_scissors_action_enum.aPaper 36aScissors = rock_paper_scissors_action_enum.aScissors 37 38oRock = rock_paper_scissors_observation_enum.oRock 39oPaper = rock_paper_scissors_observation_enum.oPaper 40oScissors = rock_paper_scissors_observation_enum.oScissors 41 42rLose = rock_paper_scissors_reward_enum.rLose 43rDraw = rock_paper_scissors_reward_enum.rDraw 44rWin = rock_paper_scissors_reward_enum.rWin 45 46class RockPaperScissors(environment.Environment): 47 """ The agent repeatedly plays Rock-Paper-Scissor against an opponent that has 48 a slight, predictable bias in its strategy. 49 50 If the opponent has won a round by playing rock on the previous cycle, it 51 will always play rock at the next time step; otherwise it will pick an 52 action uniformly at random. 53 54 The agent's observation is the most recently chosen action of the opponent. 55 It receives a reward of `rWin` for a win, `rDraw` for a draw and `rLose` for a loss. 56 57 Domain characteristics: 58 - environment: "rock_paper_scissors" 59 - maximum action: 2 (2 bits) 60 - maximum observation: 2 (2 bits) 61 - maximum reward: 2 (2 bits) 62 """ 63 64 # Instance methods. 65 66 def __init__(self, options = {}): 67 """ Construct the RockPaperScissors environment from the given options. 68 69 - `options` is a dictionary of named options and their values. 70 """ 71 72 # Set up the base environment. 73 environment.Environment.__init__(self, options = options) 74 75 # Define the acceptable action values. 76 self.valid_actions = list(rock_paper_scissors_action_enum.keys()) 77 78 # Define the acceptable observation values. 79 self.valid_observations = list(rock_paper_scissors_observation_enum.keys()) 80 81 # Define the acceptable reward values. 82 self.valid_rewards = list(rock_paper_scissors_reward_enum.keys()) 83 84 # Set an initial percept. 85 # (i.e. not rock, to ensure a random choice in the opponent on the first action.) 86 self.observation = oPaper 87 self.reward = 0 88 # end def 89 90 91 def perform_action(self, action): 92 """ Receives the agent's action and calculates the new environment percept. 93 (Called `performAction` in the C++ version.) 94 """ 95 96 assert self.is_valid_action(action) 97 98 # Save the action. 99 self.action = action 100 101 # Opponent plays rock if it won the last round by playing rock, otherwise 102 # it plays randomly. 103 if (self.observation == aRock) and (self.reward == rLose): 104 self.observation = aRock 105 else: 106 self.observation = util.choice(self.valid_actions) 107 # end if 108 109 # Determine reward. 110 if action == self.observation: 111 # If both the agent and the opponent made the same move, it's a draw. 112 self.reward = rDraw 113 elif action == aRock: 114 # If the opponent made a scissors move, then the agent wins if they played rock. 115 self.reward = rWin if self.observation == oScissors else rLose 116 elif action == aScissors: 117 # If the opponent made a paper move, then the agent wins if they played scissors. 118 self.reward = rWin if self.observation == oPaper else rLose 119 elif action == aPaper: 120 # If the opponent made a rock move, then the agent wins if they played paper. 121 self.reward = rWin if self.observation == oRock else rLose 122 # end if 123 124 # Return the resulting observation and reward. 125 return (self.observation, self.reward) 126 # end def 127 128 def print(self): 129 """ Returns a string indicating the status of the environment. 130 """ 131 132 action_text = {aRock: "rock", aPaper: "paper", aScissors: "scissors"} 133 observation_text = {oRock: "rock", oPaper: "paper", oScissors: "scissors"} 134 reward_text = {rLose: "loses", rDraw: "draws", rWin: "wins"} 135 136 message = "Agent played " + action_text[self.action] + ", " + \ 137 "environment played " + observation_text[self.observation] + "\t" + \ 138 "Agent " + reward_text[self.reward] 139 140 return message 141 # end def 142# end class