Updates based on Marcus28 and jjshoots for Graph.py

openai · jkterry1 · Jun 26, 2022 · Jun 8, 2022 · Jun 8, 2022 · Jun 11, 2022
commit 4a4b166fd05bf7794f23d73a0f1953253b745989
diff --git a/gym/envs/toy_text/taxi.py b/gym/envs/toy_text/taxi.py
@@ -89,13 +89,15 @@ class TaxiEnv(Env):
 
     ### Info
 
-    ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask".
+    ``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask" containing
+        the probability that the state is taken and a mask of what actions will result in a change of state to speed up training.
 
-    As Taxi is a stochastic environment for transitions then the "p" key represents the probability of the
-    transition. However, this value is permanently 1.0 for an unknown reason.
+    As Taxi's initial state is a stochastic, the "p" key represents the probability of the
+    transition however this value is currently bugged being 1.0, this will be fixed soon.
+    As the steps are deterministic, "p" represents the probability of the transition which is always 1.0
 
-    For some cases, taking these actions will have no effect on the state of the agent.
-    In v0.25.0, ``info["action_mask"]`` contains a numpy.ndarray for each of the action specifying
+    For some cases, taking an action will have no effect on the state of the agent.
+    In v0.25.0, ``info["action_mask"]`` contains a np.ndarray for each of the action specifying
     if the action will change the state.
 
     To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``

diff --git a/gym/spaces/dict.py b/gym/spaces/dict.py
@@ -1,6 +1,7 @@
 """Implementation of a space that represents the cartesian product of other spaces as a dictionary."""
 from collections import OrderedDict
 from collections.abc import Mapping, Sequence
+from typing import Any
 from typing import Dict as TypingDict
 from typing import Optional, Union
 
@@ -137,7 +138,7 @@ def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
 
         return seeds
 
-    def sample(self, mask: Optional[TypingDict[str, np.ndarray]] = None) -> dict:
+    def sample(self, mask: Optional[TypingDict[str, Any]] = None) -> dict:
         """Generates a single random sample from this space.
 
         The sample is an ordered dictionary of independent samples from the constituent spaces.

diff --git a/gym/spaces/discrete.py b/gym/spaces/discrete.py
@@ -46,8 +46,9 @@ def sample(self, mask: Optional[np.ndarray] = None) -> int:
         A sample will be chosen uniformly at random with the mask if provided
 
         Args:
-            mask: An optional mask for if an action can be selected. Expected shape is (n,).
-                If there are no possible actions, will default to `space.start`.
+            mask: An optional mask for if an action can be selected.
+                Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions.
+                If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned.
 
         Returns:
             A sampled integer from the space

diff --git a/gym/spaces/graph.py b/gym/spaces/graph.py
@@ -1,12 +1,12 @@
 """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space."""
 from collections import namedtuple
-from typing import NamedTuple, Optional, Sequence, Union
+from typing import NamedTuple, Optional, Sequence, Tuple, Union
 
 import numpy as np
 
 from gym.spaces.box import Box
 from gym.spaces.discrete import Discrete
-from gym.spaces.multi_discrete import MultiDiscrete
+from gym.spaces.multi_discrete import SAMPLE_MASK_TYPE, MultiDiscrete
 from gym.spaces.space import Space
 from gym.utils import seeding
 
@@ -70,58 +70,79 @@ def __init__(
 
     def _generate_sample_space(
         self, base_space: Union[None, Box, Discrete], num: int
-    ) -> Optional[Union[Box, Discrete]]:
-        # the possibility of this space , got {type(base_space)}aving nothing
-        if num == 0:
+    ) -> Optional[Union[Box, MultiDiscrete]]:
+        if num == 0 or base_space is None:
             return None
 
         if isinstance(base_space, Box):
             return Box(
                 low=np.array(max(1, num) * [base_space.low]),
                 high=np.array(max(1, num) * [base_space.high]),
-                shape=(num, *base_space.shape),
+                shape=(num,) + base_space.shape,
                 dtype=base_space.dtype,
-                seed=self._np_random,
+                seed=self.np_random,
             )
         elif isinstance(base_space, Discrete):
-            return MultiDiscrete(nvec=[base_space.n] * num, seed=self._np_random)
-        elif base_space is None:
-            return None
+            return MultiDiscrete(nvec=[base_space.n] * num, seed=self.np_random)
         else:
             raise AssertionError(
-                f"Only Box and Discrete can be accepted as a base_space, got {type(base_space)}, you should not have gotten this error."
+                f"Expects base space to be Box and Discrete, actual space: {type(base_space)}."
             )
 
-    def sample(self, mask: None = None) -> NamedTuple:
+    def sample(
+        self,
+        num_nodes: int,
+        num_edges: Optional[int] = None,
+        mask: Optional[
+            Tuple[Optional[SAMPLE_MASK_TYPE], Optional[SAMPLE_MASK_TYPE]]
+        ] = None,
+    ) -> NamedTuple:
         """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph.
 
         Args:
-            mask: As the number of nodes to determined during sample, it is not possible to know the mask beforehand.
+            num_nodes: The number of nodes that will be sampled
+            num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2
+            mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces
+                (Box spaces don't support sample masks).
+                If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges
 
         Returns:
             A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links.
         """
-        if mask is not None:
-            raise NotImplementedError(
-                "Graph.sample(mask) is not implemented as the number of nodes is determined within the function."
-            )
+        assert (
+            num_nodes > 0
+        ), f"The number of nodes is expected to be greater than 0, actual value: {num_nodes}"
 
-        num_nodes = self.np_random.integers(low=1, high=10)
+        if mask is not None:
+            node_space_mask, edge_space_mask = mask
+        else:
+            node_space_mask, edge_space_mask = None, None
 
         # we only have edges when we have at least 2 nodes
-        num_edges = 0
-        if num_nodes > 1:
-            # maximal number of edges is (n*n) allowing self connections and two-way is allowed
-            num_edges = self.np_random.integers(num_nodes * num_nodes)
+        if num_edges is None:
+            if num_nodes > 1:
+                # maximal number of edges is (n*n) allowing self connections and two-way is allowed
+                num_edges = self.np_random.integers(num_nodes * num_nodes)
+            else:
+                num_edges = 0
+            edge_space_mask = tuple(edge_space_mask for _ in range(num_edges))
+        else:
+            assert (
+                num_edges >= 0
+            ), f"The number of edges is expected to be greater than 0, actual mask: {num_edges}"
 
-        node_sample_space = self._generate_sample_space(self.node_space, num_nodes)
-        edge_sample_space = self._generate_sample_space(self.edge_space, num_edges)
+        sampled_node_space = self._generate_sample_space(self.node_space, num_nodes)
+        sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges)
 
         sampled_nodes = (
-            node_sample_space.sample() if node_sample_space is not None else None
+            sampled_node_space.sample(node_space_mask)
+            if sampled_node_space is not None
+            else None
         )
         sampled_edges = (
-            edge_sample_space.sample() if edge_sample_space is not None else None
+            sampled_edge_space.sample(edge_space_mask)
+            if sampled_edge_space is not None
+            else None
         )
 
         sampled_edge_links = None

diff --git a/gym/spaces/multi_discrete.py b/gym/spaces/multi_discrete.py
@@ -70,7 +70,7 @@ def sample(self, mask: Optional[SAMPLE_MASK_TYPE] = None) -> np.ndarray:
         Args:
             mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each
                 action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`.
-                If there are no possible actions, the default action is 0
+                Only mask values == 1 are possible to sample unless all mask values for an action are 0 then the default action 0 is sampled.
 
         Returns:
             An `np.ndarray` of shape `space.shape`