1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3"""
4Defines an environment for an agent playing Rock Paper Scissors against the environment.
5"""
6
7from __future__ import division
8from __future__ import print_function
9from __future__ import unicode_literals
10
11import os
12import random
13import sys
14
15# Insert the package's parent directory into the system search path, so that this package can be
16# imported when the aixi.py script is run directly from a release archive.
17PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir))
18sys.path.insert(0, PROJECT_ROOT)
19
20from pyaixi import environment, util
21
22# Define a enumeration to represent rock-paper-scissors actions, which is the
23# agent performing either a rock, paper, or a scissors move.
24rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors')
25
26# Define a enumeration to represent rock-paper-scissors observations, which is the
27# opponent performing either a rock, paper, or a scissors move.
28rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper', 'oScissors')
29
30# Define a enumeration to represent losing, drawing, or winning.
31rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin')
32
33# Define some shorthand notation for ease of reference.
34aRock     = rock_paper_scissors_action_enum.aRock
35aPaper    = rock_paper_scissors_action_enum.aPaper
36aScissors = rock_paper_scissors_action_enum.aScissors
37
38oRock     = rock_paper_scissors_observation_enum.oRock
39oPaper    = rock_paper_scissors_observation_enum.oPaper
40oScissors = rock_paper_scissors_observation_enum.oScissors
41
42rLose     = rock_paper_scissors_reward_enum.rLose
43rDraw     = rock_paper_scissors_reward_enum.rDraw
44rWin      = rock_paper_scissors_reward_enum.rWin
45
46class RockPaperScissors(environment.Environment):
47    """ The agent repeatedly plays Rock-Paper-Scissor against an opponent that has
48        a slight, predictable bias in its strategy.
49
50        If the opponent has won a round by playing rock on the previous cycle, it
51        will always play rock at the next time step; otherwise it will pick an
52        action uniformly at random.
53
54        The agent's observation is the most recently chosen action of the opponent.
55        It receives a reward of `rWin` for a win, `rDraw` for a draw and `rLose` for a loss.
56
57        Domain characteristics:
58         - environment: "rock_paper_scissors"
59         - maximum action: 2 (2 bits)
60         - maximum observation: 2 (2 bits)
61         - maximum reward: 2 (2 bits)
62    """
63
64    # Instance methods.
65
66    def __init__(self, options = {}):
67        """ Construct the RockPaperScissors environment from the given options.
68
69             - `options` is a dictionary of named options and their values.
70        """
71
72        # Set up the base environment.
73        environment.Environment.__init__(self, options = options)
74
75        # Define the acceptable action values.
76        self.valid_actions = list(rock_paper_scissors_action_enum.keys())
77
78        # Define the acceptable observation values.
79        self.valid_observations = list(rock_paper_scissors_observation_enum.keys())
80
81        # Define the acceptable reward values.
82        self.valid_rewards = list(rock_paper_scissors_reward_enum.keys())
83
84        # Set an initial percept.
85        # (i.e. not rock, to ensure a random choice in the opponent on the first action.)
86        self.observation = oPaper
87        self.reward = 0
88    # end def
89
90
91    def perform_action(self, action):
92        """ Receives the agent's action and calculates the new environment percept.
93            (Called `performAction` in the C++ version.)
94        """
95
96        assert self.is_valid_action(action)
97
98        # Save the action.
99        self.action = action
100
101        # Opponent plays rock if it won the last round by playing rock, otherwise
102        # it plays randomly.
103        if (self.observation == aRock) and (self.reward == rLose):
104            self.observation = aRock
105        else:
106            self.observation = util.choice(self.valid_actions)
107        # end if
108
109        # Determine reward.
110        if action == self.observation:
111            # If both the agent and the opponent made the same move, it's a draw.
112            self.reward = rDraw
113        elif action == aRock:
114            # If the opponent made a scissors move, then the agent wins if they played rock.
115            self.reward = rWin if self.observation == oScissors else rLose
116        elif action == aScissors:
117            # If the opponent made a paper move, then the agent wins if they played scissors.
118            self.reward = rWin if self.observation == oPaper else rLose
119        elif action == aPaper:
120            # If the opponent made a rock move, then the agent wins if they played paper.
121            self.reward = rWin if self.observation == oRock else rLose
122        # end if
123
124        # Return the resulting observation and reward.
125        return (self.observation, self.reward)
126    # end def
127
128    def print(self):
129        """ Returns a string indicating the status of the environment.
130        """
131
132        action_text      = {aRock: "rock", aPaper: "paper", aScissors: "scissors"}
133        observation_text = {oRock: "rock", oPaper: "paper", oScissors: "scissors"}
134        reward_text      = {rLose: "loses", rDraw: "draws", rWin: "wins"}
135
136        message = "Agent played " + action_text[self.action] + ", " + \
137                  "environment played " + observation_text[self.observation] + "\t" + \
138                  "Agent " + reward_text[self.reward]
139
140        return message
141    # end def
142# end class