Added docstrings, fixed bugs and added taxi examples

openai · jkterry1 · Jun 26, 2022 · Jun 8, 2022 · Jun 8, 2022 · Jun 11, 2022
commit cd910072066d16744178d96197068ed4c009ea11
diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
@@ -56,6 +56,12 @@ class TaxiEnv(Env):
     - 4: pickup passenger
     - 5: drop off passenger
 
+    For some cases, taking these actions will have no effect on the state of the agent.
+    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
+    if the action will change the state.
+    To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
+    Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``.
+
     ### Observations
     There are 500 discrete states since there are 25 taxi positions, 5 possible
     locations of the passenger (including the case when the passenger is in the
@@ -99,7 +105,7 @@ class TaxiEnv(Env):
     ```
 
     ### Version History
-    * v3: Map Correction + Cleaner Domain Description
+    * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information
     * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold.
     * v1: Remove (3,2) from locs, add passidx<4 check
     * v0: Initial versions release

diff --git a/gym/spaces/box.py b/gym/spaces/box.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 
+import gym.error
 from gym import logger
 from gym.spaces.space import Space
 from gym.utils import seeding
@@ -146,7 +147,7 @@ def is_bounded(self, manner: str = "both") -> bool:
         else:
             raise ValueError("manner is not in {'below', 'above', 'both'}")
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
+    def sample(self, mask: None = None) -> np.ndarray:
         r"""Generates a single random sample inside the Box.
 
         In creating a sample of the box, each coordinate is sampled (independently) from a distribution
@@ -157,11 +158,16 @@ def sample(self, mask: np.ndarray = None) -> np.ndarray:
         * :math:`(-\infty, b]` : shifted negative exponential distribution
         * :math:`(-\infty, \infty)` : normal distribution
 
+        Args:
+            mask: A mask for sampling values from the Box space, currently unsupported.
+
         Returns:
             A sampled value from the Box
         """
         if mask is not None:
-            return np.zeros(self.shape, self.dtype)
+            raise gym.error.Error(
+                f"Box.sample cannot be provided a mask, actual value: {mask}"
+            )
 
         high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1
         sample = np.empty(self.shape)

diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py
@@ -1,7 +1,6 @@
 """Implementation of a space that represents the cartesian product of other spaces as a dictionary."""
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
-from typing import Dict
 from typing import Dict as TypingDict
 from typing import Optional, Union
 
@@ -138,21 +137,24 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Dict[str, np.ndarray] = None) -> dict:
+    def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict:
         """Generates a single random sample from this space.
 
         The sample is an ordered dictionary of independent samples from the constituent spaces.
 
+        Args:
+            mask: An optional mask for each of the subspaces, expects the same keys as the space
+
         Returns:
             A dictionary with the same key and sampled values from :attr:`self.spaces`
         """
         if mask is not None:
             assert isinstance(
                 mask, dict
-            ), f"Expects mask to be a dict, actual type: {type(dict)}"
+            ), f"Expects mask to be a dict, actual type: {type(mask)}"
             assert (
-                mask.keys == self.keys()
-            ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.keys()}"
+                mask.keys() == self.spaces.keys()
+            ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.spaces.keys()}"
             return OrderedDict(
                 [(k, space.sample(mask[k])) for k, space in self.spaces.items()]
             )

diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
@@ -40,20 +40,32 @@ def __init__(
         self.start = int(start)
         super().__init__((), np.int64, seed)
 
-    def sample(self, mask: np.ndarray = None) -> int:
+    def sample(self, mask: Optional[np.ndarray] = None) -> int:
         """Generates a single random sample from this space.
 
-        A sample will be chosen uniformly at random.
+        A sample will be chosen uniformly at random with the mask if provided
+
+        Args:
+            mask: An optional mask for if an action can be selected. Expected shape is (n,). If not possible actions, will default to `space.start`
 
         Returns:
             A sampled integer from the space
         """
         if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == (self.n,)
-            if np.any(mask):
-                return int(self.start + self.np_random.choice(np.where(mask)))
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert mask.shape == (
+                self.n,
+            ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}"
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+            if np.any(mask == 1):
+                return int(self.start + self.np_random.choice(np.where(mask)[0]))
             else:
                 return self.start
 

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
@@ -1,10 +1,9 @@
 """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space."""
 from collections import namedtuple
-from typing import NamedTuple, Optional, Sequence, Union
+from typing import NamedTuple, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
-import gym
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
 from gym.spaces.multi_discrete import MultiDiscrete
@@ -93,23 +92,18 @@ def _generate_sample_space(
                 f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error."
             )
 
-    def _sample_sample_space(self, sample_space) -> Optional[np.ndarray]:
-        if sample_space is not None:
-            return sample_space.sample()
-        else:
-            return None
-
-    def sample(self, mask=None) -> NamedTuple:
+    def sample(
+        self, mask: Optional[Tuple[Optional[np.ndarray], Optional[np.ndarray]]] = None
+    ) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
+        Args:
+            mask: An optional tuple for the node space mask and the edge space mask (only valid for Discrete spaces)
+
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
-        if mask is not None:
-            raise gym.error.Error(
-                "Action masking for graphs are not supported at this time, please raise an issue on github."
-            )
-
+        node_mask, edge_mask = mask if mask is not None else (None, None)
         num_nodes = self.np_random.integers(low=1, high=10)
 
         # we only have edges when we have at least 2 nodes
@@ -121,8 +115,16 @@ def sample(self, mask=None) -> NamedTuple:
         node_sample_space = self._generate_sample_space(self.node_space, num_nodes)
         edge_sample_space = self._generate_sample_space(self.edge_space, num_edges)
 
-        sampled_nodes = self._sample_sample_space(node_sample_space)
-        sampled_edges = self._sample_sample_space(edge_sample_space)
+        sampled_nodes = (
+            node_sample_space.sample(node_mask)
+            if node_sample_space is not None
+            else None
+        )
+        sampled_edges = (
+            edge_sample_space.sample(edge_mask)
+            if edge_sample_space is not None
+            else None
+        )
 
         sampled_edge_links = None
         if sampled_edges is not None and num_edges > 0:

diff --git a/gym/spaces/multi_binary.py b/gym/spaces/multi_binary.py
@@ -51,18 +51,31 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than gym.Space - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
+    def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
         """Generates a single random sample from this space.
 
         A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space).
 
+        Args:
+            mask: An optional np.ndarray to mask samples, where mask == 0 will have samples == 0
+
         Returns:
             Sampled values from space
         """
         if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == self.shape
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert (
+                mask.shape == self.shape
+            ), f"The expected shape of the mask is {self.shape}, actual shape: {mask.shape}"
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+
             return mask * self.np_random.integers(0, 2, self.n, self.dtype)
 
         return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype)

diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
@@ -23,8 +23,17 @@ class MultiDiscrete(Space[np.ndarray]):
     2. Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
     3. Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
 
-    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``
+    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample is array([3, 1, 0])
 
+    Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes
+    if ``nvec`` has several axes:
+
+    Example::
+
+        >> d = MultiDiscrete(np.array([[1, 2], [3, 4]]))
+        >> d.sample()
+        array([[0, 0],
+               [2, 3]])
     """
 
     def __init__(
@@ -37,16 +46,6 @@ def __init__(
 
         The argument ``nvec`` will determine the number of values each categorical variable can take.
 
-        Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes
-        if ``nvec`` has several axes:
-
-        Example::
-
-            >> d = MultiDiscrete(np.array([[1, 2], [3, 4]]))
-            >> d.sample()
-            array([[0, 0],
-                   [2, 3]])
-
         Args:
             nvec: vector of counts of each categorical variable. This will usually be a list of integers. However,
                 you may also pass a more complicated numpy array if you'd like the space to have several axes.
@@ -63,14 +62,30 @@ def shape(self) -> Tuple[int, ...]:
         """Has stricter type than :class:`gym.Space` - never None."""
         return self._shape  # type: ignore
 
-    def sample(self, mask: np.ndarray = None) -> np.ndarray:
-        """Generates a single random sample this space."""
-        if mask is not None:
-            assert isinstance(mask, np.ndarray)
-            assert mask.dtype == np.int8
-            assert mask.shape == self.shape
+    def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
+        """Generates a single random sample this space.
+
+        Args:
+            mask: An optional mask for multi-discrete, expected shape is `space.nvec`. If there are no possible actions, defaults to 0
 
-            multi_mask = [np.where(row) for row in mask]
+        Returns:
+            An np.ndarray of shape `space.shape`
+        """
+        if mask is not None:
+            assert isinstance(
+                mask, np.ndarray
+            ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}"
+            assert (
+                mask.dtype == np.int8
+            ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}"
+            assert np.all(
+                mask.shape == self.nvec
+            ), f"The expected shape of the mask is {self.nvec}, actual shape: {mask.shape}. We don't support multi-axis nvec currently."
+            assert np.all(
+                np.logical_or(mask == 0, mask == 1)
+            ), f"All values of a mask should be 0 or 1, actual values: {mask}"
+
+            multi_mask = [np.where(row)[0] for row in mask]
             return np.array(
                 [
                     self.np_random.choice(row_mask) if len(row_mask) > 0 else 0

diff --git a/gym/spaces/space.py b/gym/spaces/space.py
@@ -1,6 +1,7 @@
 """Implementation of the `Space` metaclass."""
 
 from typing import (
+    Any,
     Generic,
     Iterable,
     List,
@@ -81,8 +82,17 @@ def shape(self) -> Optional[Tuple[int, ...]]:
         """Return the shape of the space as an immutable property."""
         return self._shape
 
-    def sample(self, mask=None) -> T_cov:
-        """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space."""
+    def sample(self, mask: Optional[Any] = None) -> T_cov:
+        """Randomly sample an element of this space.
+
+        Can be uniform or non-uniform sampling based on boundedness of space.
+
+        Args:
+            mask: A mask used for sampling, see Space for implementation details.
+
+        Returns:
+            A sampled actions from the space
+        """
         raise NotImplementedError
 
     def seed(self, seed: Optional[int] = None) -> list:

diff --git a/gym/spaces/tuple.py b/gym/spaces/tuple.py
@@ -72,17 +72,25 @@ def seed(self, seed: Optional[Union[int, List[int]]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Tuple[np.ndarray] = None) -> tuple:
+    def sample(self, mask: Optional[Tuple[Optional[np.ndarray]]] = None) -> tuple:
         """Generates a single random sample inside this space.
 
         This method draws independent samples from the subspaces.
 
+        Args:
+            mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces
+
         Returns:
             Tuple of the subspace's samples
         """
         if mask is not None:
-            assert isinstance(mask, tuple)
-            assert len(mask) == len(self.spaces)
+            assert isinstance(
+                mask, tuple
+            ), f"Expected type of mask is tuple, actual type: {type(mask)}"
+            assert len(mask) == len(
+                self.spaces
+            ), f"Expected length of mask is {len(self.spaces)}, actual length: {len(mask)}"
+
             return tuple(
                 space.sample(mask=sub_mask)
                 for space, sub_mask in zip(self.spaces, mask)