sergiopaniego HF Staff commited on
Commit
c65b2a4
ยท
verified ยท
1 Parent(s): 79f14e8

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # =============================================================================
8
+ # OpenSpiel Environment Dockerfile
9
+ # =============================================================================
10
+ #
11
+ # Uses a pre-built OpenSpiel base image to avoid long build times (~30-60 min).
12
+ # The base image contains compiled OpenSpiel (C++ and Python bindings).
13
+ #
14
+ # DEFAULT (recommended for HuggingFace Spaces):
15
+ # Uses pre-built image from GHCR - no C++ compilation needed
16
+ #
17
+ # BUILD YOUR OWN BASE IMAGE (if you need custom OpenSpiel configuration):
18
+ # 1. Build the base image first (takes ~30-60 min):
19
+ # docker build -t openspiel-base:latest -f server/Dockerfile.openspiel-base .
20
+ # 2. Then build with your local base image:
21
+ # docker build --build-arg OPENSPIEL_BASE_IMAGE=openspiel-base:latest -t openspiel-env .
22
+ #
23
+ # =============================================================================
24
+
25
+ # Default: use pre-built image from GHCR (skips C++ compilation)
26
+ ARG OPENSPIEL_BASE_IMAGE=ghcr.io/meta-pytorch/openenv-openspiel-base:sha-e622c7e
27
+ FROM ${OPENSPIEL_BASE_IMAGE}
28
+
29
+ WORKDIR /app
30
+
31
+ # Install git (needed for pip install from git repos in pyproject.toml)
32
+ RUN apt-get update && apt-get install -y --no-install-recommends git \
33
+ && rm -rf /var/lib/apt/lists/*
34
+
35
+ # Copy environment code (context is the environment directory)
36
+ COPY . /app/env
37
+
38
+ # Install Python dependencies from pyproject.toml
39
+ WORKDIR /app/env
40
+ RUN pip3 install --no-cache-dir .
41
+
42
+ WORKDIR /app
43
+
44
+ # Copy README for web interface documentation
45
+ COPY README.md /app/README.md
46
+
47
+ # Python path configuration
48
+ # - /repo and /repo/build/python: OpenSpiel paths from base image
49
+ # - /app/env: Environment code
50
+ ENV PYTHONPATH=/repo:/repo/build/python:/app/env
51
+
52
+ # OpenSpiel-specific environment variables (can be overridden at runtime)
53
+ ENV OPENSPIEL_GAME=catch
54
+ ENV OPENSPIEL_AGENT_PLAYER=0
55
+ ENV OPENSPIEL_OPPONENT_POLICY=random
56
+
57
+ # Health check
58
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=120s --retries=3 \
59
+ CMD curl -f http://localhost:8000/health || exit 1
60
+
61
+ EXPOSE 8000
62
+
63
+ # Run the FastAPI server
64
+ ENV ENABLE_WEB_INTERFACE=true
65
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "120"]
README.md CHANGED
@@ -1,10 +1,381 @@
1
  ---
2
- title: Openspiel Env
3
- emoji: ๐Ÿฆ€
4
- colorFrom: pink
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OpenSpiel Environment Server
3
+ emoji: ๐ŸŽฎ
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
 
14
+ # OpenSpiel Environment
15
+
16
+ Integration of OpenSpiel games with the OpenEnv framework. [OpenSpiel](https://github.com/google-deepmind/open_spiel) is DeepMind's collection of 70+ game environments for RL research.
17
+
18
+ ## Supported Games
19
+
20
+ This environment supports 6 games across different categories:
21
+
22
+ ### Single-Player Games (No Opponent)
23
+ 1. **Catch** - Move horizontally to catch a falling ball
24
+ 2. **Cliff Walking** - Navigate grid without falling off cliff (Sutton & Barto benchmark)
25
+ 3. **2048** - Classic tile-merging puzzle game
26
+ 4. **Blackjack** - Simplified blackjack (HIT/STAND only)
27
+
28
+ ### Multi-Player Games (with Bot Opponent)
29
+ 5. **Tic-Tac-Toe** - Classic 3x3 game
30
+ 6. **Kuhn Poker** - 2-player simplified poker (game theory benchmark)
31
+
32
+ ## Quick Start
33
+
34
+ The simplest way to use the OpenSpiel environment is through the `OpenSpielEnv` class:
35
+
36
+ ```python
37
+ from openspiel_env import OpenSpielEnv, OpenSpielAction
38
+
39
+ try:
40
+ # Create environment from Docker image
41
+ env = OpenSpielEnv.from_docker_image("openspiel-env:latest")
42
+
43
+ # Reset to start a new episode
44
+ result = env.reset()
45
+ print(f"Initial state: {result.observation.info_state}")
46
+ print(f"Legal actions: {result.observation.legal_actions}")
47
+
48
+ # Play until done
49
+ while not result.done:
50
+ action_id = result.observation.legal_actions[0]
51
+ result = env.step(OpenSpielAction(action_id=action_id))
52
+ print(f"Reward: {result.reward}, Done: {result.done}")
53
+
54
+ finally:
55
+ # Always clean up
56
+ env.close()
57
+ ```
58
+
59
+ That's it! The `OpenSpielEnv.from_docker_image()` method handles:
60
+ - Starting the Docker container
61
+ - Waiting for the server to be ready
62
+ - Connecting to the environment
63
+ - Container cleanup when you call `close()`
64
+
65
+ ## Building the Docker Image
66
+
67
+ OpenSpiel requires compilation from C++ source. The Docker build uses a **pre-built base image** by default to avoid long build times.
68
+
69
+ ### Default Build (Recommended)
70
+
71
+ From the **environment directory** (`envs/openspiel_env/`):
72
+
73
+ ```bash
74
+ # Uses pre-built base image from GHCR (fast, ~1-2 min)
75
+ docker build -t openspiel-env:latest -f server/Dockerfile .
76
+ ```
77
+
78
+ This uses the pre-built `ghcr.io/meta-pytorch/openenv-openspiel-base` image which already contains compiled OpenSpiel.
79
+
80
+ ### Building Your Own Base Image (Optional)
81
+
82
+ If you need to customize OpenSpiel or can't access the pre-built image:
83
+
84
+ ```bash
85
+ # Step 1: Build the base image (compiles OpenSpiel, ~30-60 min)
86
+ docker build -t openspiel-base:latest -f server/Dockerfile.openspiel-base .
87
+
88
+ # Step 2: Build the environment using your local base image
89
+ docker build -t openspiel-env:latest \
90
+ --build-arg OPENSPIEL_BASE_IMAGE=openspiel-base:latest \
91
+ -f server/Dockerfile .
92
+ ```
93
+
94
+ ## Deploying to Hugging Face Spaces
95
+
96
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
97
+
98
+ ```bash
99
+ # From the environment directory (envs/openspiel_env/)
100
+ openenv push
101
+
102
+ # Or specify options
103
+ openenv push --namespace my-org --private
104
+ ```
105
+
106
+ The `openenv push` command will:
107
+ 1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
108
+ 2. Prepare a custom build for Hugging Face Docker space (enables web interface)
109
+ 3. Upload to Hugging Face (ensuring you're logged in)
110
+
111
+ ### Prerequisites
112
+
113
+ - Authenticate with Hugging Face: The command will prompt for login if not already authenticated
114
+
115
+ ### Options
116
+
117
+ - `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
118
+ - `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
119
+ - `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
120
+ - `--private`: Deploy the space as private (default: public)
121
+
122
+ ### Examples
123
+
124
+ ```bash
125
+ # Push to your personal namespace (defaults to username/env-name from openenv.yaml)
126
+ openenv push
127
+
128
+ # Push to a specific repository
129
+ openenv push --repo-id my-org/openspiel-env
130
+
131
+ # Push as a private space
132
+ openenv push --private
133
+
134
+ # Combine options
135
+ openenv push --repo-id my-org/openspiel-env --private
136
+ ```
137
+
138
+ After deployment, your space will be available at:
139
+ `https://huggingface.co/spaces/<repo-id>`
140
+
141
+ The deployed space includes:
142
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
143
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
144
+ - **Health Check** at `/health` - Container health monitoring
145
+
146
+ > **Note**: The default Dockerfile uses a pre-built base image with OpenSpiel already compiled, so deployment is fast and works with standard CPU hardware. If you build your own base image, compilation requires more resources and time.
147
+
148
+ ## Running Specific Games
149
+
150
+ ```bash
151
+ # Catch (default)
152
+ docker run -p 8000:8000 openspiel-env:latest
153
+
154
+ # Tic-Tac-Toe with random opponent
155
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=tic_tac_toe openspiel-env:latest
156
+
157
+ # Kuhn Poker
158
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=kuhn_poker openspiel-env:latest
159
+
160
+ # 2048
161
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=2048 openspiel-env:latest
162
+
163
+ # Blackjack
164
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=blackjack openspiel-env:latest
165
+
166
+ # Cliff Walking
167
+ docker run -p 8000:8000 -e OPENSPIEL_GAME=cliff_walking openspiel-env:latest
168
+ ```
169
+
170
+ ## Environment Details
171
+
172
+ ### Action
173
+ **OpenSpielAction**: Contains the action to take
174
+ - `action_id` (int) - Action ID to execute
175
+ - `game_name` (str) - Game name (default: "catch")
176
+ - `game_params` (Dict) - Optional game parameters
177
+
178
+ ### Observation
179
+ **OpenSpielObservation**: Contains the game state
180
+ - `info_state` (List[float]) - Agent's information state vector
181
+ - `legal_actions` (List[int]) - Legal action IDs
182
+ - `game_phase` (str) - "initial", "playing", or "terminal"
183
+ - `current_player_id` (int) - Current player (-1 for simultaneous)
184
+ - `opponent_last_action` (Optional[int]) - Last opponent action
185
+ - `done` (bool) - Whether the episode has ended
186
+ - `reward` (Optional[float]) - Reward for the last action
187
+
188
+ ### State
189
+ **OpenSpielState**: Server-side state snapshot
190
+ - `episode_id` (str) - Unique identifier for the current episode
191
+ - `step_count` (int) - Number of steps taken
192
+ - `game_name` (str) - Game name
193
+ - `agent_player` (int) - Agent's player ID
194
+ - `opponent_policy` (str) - Opponent policy name
195
+ - `num_players` (int) - Total players
196
+
197
+ ## Configuration
198
+
199
+ ### Environment Variables
200
+
201
+ - `OPENSPIEL_GAME`: Game name (default: "catch")
202
+ - `OPENSPIEL_AGENT_PLAYER`: Player ID for agent (default: 0)
203
+ - `OPENSPIEL_OPPONENT_POLICY`: Opponent policy for multi-player games
204
+ - `random`: Uniform random (default)
205
+ - `first`: Always picks first legal action
206
+ - `last`: Always picks last legal action
207
+
208
+ ### Example: Tic-Tac-Toe with Fixed Opponent
209
+
210
+ ```bash
211
+ docker run -p 8000:8000 \
212
+ -e OPENSPIEL_GAME=tic_tac_toe \
213
+ -e OPENSPIEL_OPPONENT_POLICY=first \
214
+ openspiel-env:latest
215
+ ```
216
+
217
+ ## Advanced Usage
218
+
219
+ ### Connecting to an Existing Server
220
+
221
+ If you already have an OpenSpiel environment server running:
222
+
223
+ ```python
224
+ from openspiel_env import OpenSpielEnv, OpenSpielAction
225
+
226
+ # Connect to existing server
227
+ env = OpenSpielEnv(base_url="http://localhost:8000")
228
+
229
+ # Use as normal
230
+ result = env.reset()
231
+ result = env.step(OpenSpielAction(action_id=result.observation.legal_actions[0]))
232
+
233
+ # Close connection (does NOT stop the server)
234
+ env.close()
235
+ ```
236
+
237
+ ### Connecting to HuggingFace Space
238
+
239
+ ```python
240
+ from openspiel_env import OpenSpielEnv, OpenSpielAction
241
+
242
+ # Connect to remote Space
243
+ env = OpenSpielEnv(base_url="https://your-username-openspiel.hf.space")
244
+
245
+ result = env.reset()
246
+ print(f"Game: {result.observation.game_phase}")
247
+ print(f"Legal actions: {result.observation.legal_actions}")
248
+
249
+ result = env.step(OpenSpielAction(action_id=result.observation.legal_actions[0]))
250
+ env.close()
251
+ ```
252
+
253
+ ## Game-Specific Information
254
+
255
+ ### 1. Catch
256
+ - **Type**: Single-player
257
+ - **Action Space**: 3 actions (left, stay, right)
258
+ - **Observation**: 5x5 grid flattened (25 dimensions)
259
+ - **Reward**: +1 for catching ball, 0 otherwise
260
+ - **Episode Length**: ~10 steps
261
+
262
+ ### 2. Tic-Tac-Toe
263
+ - **Type**: 2-player turn-based, perfect information
264
+ - **Players**: Agent (X) vs Random Bot (O)
265
+ - **Action Space**: 9 positions
266
+ - **Observation**: 27 dimensions (3x3 board + game state)
267
+ - **Reward**: +1 win, -1 loss, 0 draw/mid-game
268
+
269
+ ### 3. Kuhn Poker
270
+ - **Type**: 2-player turn-based, imperfect information
271
+ - **Players**: Agent vs Random Bot
272
+ - **Action Space**: 2 actions (pass/fold, bet/call)
273
+ - **Observation**: 6 dimensions (card + betting history)
274
+ - **Reward**: Pot winnings (typically -1, 0, +1, +2)
275
+ - **Notes**: THE benchmark for imperfect-information RL
276
+
277
+ ### 4. Cliff Walking
278
+ - **Type**: Single-player grid world
279
+ - **Action Space**: 4 actions (up, down, left, right)
280
+ - **Observation**: Position encoding
281
+ - **Reward**: -1 per step, -100 for falling off cliff
282
+ - **Notes**: Classic RL benchmark from Sutton & Barto
283
+
284
+ ### 5. 2048
285
+ - **Type**: Single-player puzzle
286
+ - **Action Space**: 4 actions (up, down, left, right)
287
+ - **Observation**: 4x4 grid with tile values
288
+ - **Reward**: Points from merging tiles
289
+ - **Notes**: Stochastic tile spawning
290
+
291
+ ### 6. Blackjack
292
+ - **Type**: Single-player vs dealer
293
+ - **Action Space**: 2 actions (HIT, STAND)
294
+ - **Observation**: Player hand + dealer's visible card
295
+ - **Reward**: +1 win, -1 loss, 0 draw
296
+ - **Notes**: Simplified version, no double/split
297
+
298
+ ## Development & Testing
299
+
300
+ ### Direct Environment Testing
301
+
302
+ Test the environment logic directly without starting the HTTP server (requires OpenSpiel installed locally):
303
+
304
+ ```python
305
+ from openspiel_env.server.openspiel_environment import OpenSpielEnvironment
306
+ from openspiel_env.models import OpenSpielAction
307
+
308
+ # Create environment directly
309
+ env = OpenSpielEnvironment(game_name="catch")
310
+
311
+ # Test reset
312
+ obs = env.reset()
313
+ print(f"Info state: {obs.info_state}")
314
+
315
+ # Test step
316
+ obs = env.step(OpenSpielAction(action_id=0))
317
+ print(f"Done: {obs.done}, Reward: {obs.reward}")
318
+ ```
319
+
320
+ ### Running Locally
321
+
322
+ Run the server locally for development (requires OpenSpiel installed):
323
+
324
+ ```bash
325
+ # From the environment directory
326
+ cd envs/openspiel_env
327
+
328
+ # Install dependencies
329
+ uv venv && source .venv/bin/activate
330
+ uv pip install -e .
331
+
332
+ # Start the server
333
+ python -m uvicorn server.app:app --reload
334
+ ```
335
+
336
+ Or using the CLI entry point:
337
+
338
+ ```bash
339
+ uv run --project . server --port 8000
340
+ ```
341
+
342
+ ### Automated Testing (All 6 Games)
343
+
344
+ ```bash
345
+ ./test_docker_all_games.sh
346
+ ```
347
+
348
+ This script will build and test all 6 supported games in Docker.
349
+
350
+ ## Project Structure
351
+
352
+ ```
353
+ openspiel_env/
354
+ โ”œโ”€โ”€ __init__.py # Module exports
355
+ โ”œโ”€โ”€ README.md # This file
356
+ โ”œโ”€โ”€ openenv.yaml # OpenEnv manifest
357
+ โ”œโ”€โ”€ pyproject.toml # Project metadata and dependencies
358
+ โ”œโ”€โ”€ client.py # OpenSpielEnv client implementation
359
+ โ”œโ”€โ”€ models.py # Action, Observation, and State models
360
+ โ”œโ”€โ”€ test_docker_all_games.sh # Automated test script
361
+ โ””โ”€โ”€ server/
362
+ โ”œโ”€โ”€ __init__.py # Server module exports
363
+ โ”œโ”€โ”€ openspiel_environment.py # Core OpenSpielEnvironment implementation
364
+ โ”œโ”€โ”€ opponent_policies.py # Opponent policies (random, fixed)
365
+ โ”œโ”€โ”€ app.py # FastAPI application
366
+ โ”œโ”€โ”€ Dockerfile # Environment container (uses pre-built base)
367
+ โ””โ”€โ”€ Dockerfile.openspiel-base # Base image with compiled OpenSpiel
368
+ ```
369
+
370
+ ## Limitations
371
+
372
+ - **Simultaneous-move games**: Only agent_player=0 supported
373
+ - **Multi-agent training**: Single agent only (no self-play yet)
374
+ - **Opponent policies**: Random and fixed only (no MCTS yet)
375
+ - **Build time**: Building your own base image takes ~30-60 min (compiles OpenSpiel C++). Using the pre-built image is fast (~1-2 min) and works with standard hardware.
376
+
377
+ ## References
378
+
379
+ - [OpenSpiel Paper (2019)](https://arxiv.org/abs/1908.09453)
380
+ - [OpenSpiel GitHub](https://github.com/google-deepmind/open_spiel)
381
+ - [OpenSpiel Documentation](https://openspiel.readthedocs.io/)
__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpiel Environment Integration.
9
+
10
+ This module provides integration between OpenSpiel games and the OpenEnv framework.
11
+ OpenSpiel (https://github.com/google-deepmind/open_spiel) is DeepMind's collection
12
+ of environments and algorithms for research in RL in games.
13
+
14
+ Supported games:
15
+ - Catch (1P)
16
+ - Tic-Tac-Toe (2P)
17
+ - Kuhn Poker (2P, imperfect info)
18
+ - Cliff Walking (1P)
19
+ - 2048 (1P)
20
+ - Blackjack (1P)
21
+ """
22
+
23
+ from .client import OpenSpielEnv
24
+ from .models import OpenSpielAction, OpenSpielObservation, OpenSpielState
25
+
26
+ __all__ = ["OpenSpielEnv", "OpenSpielAction", "OpenSpielObservation", "OpenSpielState"]
client.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpielEnv Client.
9
+
10
+ This module provides the client for connecting to an OpenSpiel Environment server
11
+ via WebSocket for persistent sessions.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Dict, Optional, TYPE_CHECKING
17
+
18
+ from openenv.core.client_types import StepResult
19
+
20
+ from openenv.core.env_client import EnvClient
21
+
22
+ from .models import OpenSpielAction, OpenSpielObservation, OpenSpielState
23
+
24
+ if TYPE_CHECKING:
25
+ from openenv.core.containers.runtime import ContainerProvider
26
+
27
+
28
+ class OpenSpielEnv(EnvClient[OpenSpielAction, OpenSpielObservation, OpenSpielState]):
29
+ """
30
+ Client for OpenSpiel Environment.
31
+
32
+ This client maintains a persistent WebSocket connection to the environment
33
+ server, enabling efficient multi-step interactions with lower latency.
34
+
35
+ Example:
36
+ >>> # Connect to a running server
37
+ >>> with OpenSpielEnv(base_url="http://localhost:8000") as client:
38
+ ... result = client.reset()
39
+ ... print(result.observation.info_state)
40
+ ...
41
+ ... result = client.step(OpenSpielAction(action_id=1, game_name="catch"))
42
+ ... print(result.observation.reward)
43
+
44
+ Example with Docker:
45
+ >>> # Automatically start container and connect
46
+ >>> client = OpenSpielEnv.from_docker_image("openspiel-env:latest")
47
+ >>> try:
48
+ ... result = client.reset()
49
+ ... result = client.step(OpenSpielAction(action_id=0))
50
+ ... finally:
51
+ ... client.close()
52
+ """
53
+
54
+ def _step_payload(self, action: OpenSpielAction) -> Dict[str, Any]:
55
+ """
56
+ Convert OpenSpielAction to JSON payload for step request.
57
+
58
+ Args:
59
+ action: OpenSpielAction instance.
60
+
61
+ Returns:
62
+ Dictionary representation suitable for JSON encoding.
63
+ """
64
+ return {
65
+ "action_id": action.action_id,
66
+ "game_name": action.game_name,
67
+ "game_params": action.game_params,
68
+ }
69
+
70
+ def _parse_result(
71
+ self, payload: Dict[str, Any]
72
+ ) -> StepResult[OpenSpielObservation]:
73
+ """
74
+ Parse server response into StepResult[OpenSpielObservation].
75
+
76
+ Args:
77
+ payload: JSON response from server.
78
+
79
+ Returns:
80
+ StepResult with OpenSpielObservation.
81
+ """
82
+ obs_data = payload.get("observation", {})
83
+
84
+ observation = OpenSpielObservation(
85
+ info_state=obs_data.get("info_state", []),
86
+ legal_actions=obs_data.get("legal_actions", []),
87
+ game_phase=obs_data.get("game_phase", "playing"),
88
+ current_player_id=obs_data.get("current_player_id", 0),
89
+ opponent_last_action=obs_data.get("opponent_last_action"),
90
+ done=payload.get("done", False),
91
+ reward=payload.get("reward"),
92
+ metadata=obs_data.get("metadata", {}),
93
+ )
94
+
95
+ return StepResult(
96
+ observation=observation,
97
+ reward=payload.get("reward"),
98
+ done=payload.get("done", False),
99
+ )
100
+
101
+ def _parse_state(self, payload: Dict[str, Any]) -> OpenSpielState:
102
+ """
103
+ Parse server response into OpenSpielState object.
104
+
105
+ Args:
106
+ payload: JSON response from /state endpoint.
107
+
108
+ Returns:
109
+ OpenSpielState object with environment state information.
110
+ """
111
+ return OpenSpielState(
112
+ episode_id=payload.get("episode_id"),
113
+ step_count=payload.get("step_count", 0),
114
+ game_name=payload.get("game_name", "unknown"),
115
+ agent_player=payload.get("agent_player", 0),
116
+ opponent_policy=payload.get("opponent_policy", "random"),
117
+ game_params=payload.get("game_params", {}),
118
+ num_players=payload.get("num_players", 1),
119
+ )
docker_issue.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # port issue? fix proxy?
models.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for OpenSpiel Environment.
9
+
10
+ This module defines the Action, Observation, and State types for OpenSpiel games.
11
+ """
12
+
13
+ from __future__ import annotations
14
+ from pydantic import Field
15
+
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ from openenv.core.env_server import Action, Observation, State
19
+
20
+
21
+ class OpenSpielAction(Action):
22
+ """
23
+ Action for OpenSpiel environments.
24
+
25
+ Attributes:
26
+ action_id: The integer action ID to take (from legal_actions).
27
+ game_name: Name of the OpenSpiel game (e.g., "catch", "tic_tac_toe").
28
+ game_params: Optional game-specific parameters (e.g., {"rows": 8, "columns": 6}).
29
+ """
30
+ action_id: int
31
+ game_name: str = "catch"
32
+ game_params: Dict[str, Any] = Field(default_factory=dict)
33
+
34
+
35
+ class OpenSpielObservation(Observation):
36
+ """
37
+ Observation from OpenSpiel environment.
38
+
39
+ This represents what the agent sees after taking an action.
40
+ For single-player games, this is straightforward.
41
+ For multi-player games, this is from the perspective of the agent player.
42
+
43
+ Attributes:
44
+ info_state: Information state tensor (list of floats) for the agent.
45
+ This contains all information available to the agent.
46
+ legal_actions: List of legal action IDs the agent can take.
47
+ game_phase: String describing the current phase (e.g., "playing", "terminal").
48
+ current_player_id: ID of the current player (-1 for simultaneous, player ID otherwise).
49
+ opponent_last_action: Last action taken by opponent (if available, None otherwise).
50
+ """
51
+ info_state: List[float]
52
+ legal_actions: List[int]
53
+ game_phase: str = "playing"
54
+ current_player_id: int = 0
55
+ opponent_last_action: Optional[int] = None
56
+
57
+
58
+ class OpenSpielState(State):
59
+ """
60
+ State for OpenSpiel environment.
61
+
62
+ Attributes:
63
+ game_name: Name of the OpenSpiel game.
64
+ agent_player: Which player ID the agent controls (0 by default).
65
+ opponent_policy: Name of the opponent policy ("random", "fixed", etc.).
66
+ game_params: Game-specific parameters.
67
+ num_players: Total number of players in the game.
68
+ """
69
+ game_name: str = "catch"
70
+ agent_player: int = 0
71
+ opponent_policy: str = "random"
72
+ game_params: Dict[str, Any] = Field(default_factory=dict)
73
+ num_players: int = 1
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: openspiel_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
pyproject.toml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-openspiel-env"
13
+ version = "0.1.0"
14
+ description = "OpenSpiel Environment for OpenEnv - integration with DeepMind's game research framework"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ # Core OpenEnv dependencies (required for server functionality)
18
+ "openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@main",
19
+ "fastapi>=0.115.0",
20
+ "pydantic>=2.0.0",
21
+ "uvicorn>=0.24.0",
22
+ "requests>=2.31.0",
23
+ # Note: OpenSpiel (pyspiel) is built from source in the Docker image
24
+ # and is not available as a pip package. The Docker build compiles it
25
+ # from https://github.com/google-deepmind/open_spiel
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ dev = [
30
+ "pytest>=8.0.0",
31
+ "pytest-cov>=4.0.0",
32
+ ]
33
+
34
+ [project.scripts]
35
+ # Server entry point
36
+ server = "openspiel_env.server.app:main"
37
+
38
+ [tool.setuptools]
39
+ include-package-data = true
40
+ packages = ["openspiel_env", "openspiel_env.server"]
41
+ package-dir = { "openspiel_env" = ".", "openspiel_env.server" = "server" }
server/Dockerfile.openspiel-base ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Pre-built OpenSpiel base image
8
+ # This image contains OpenSpiel compiled and ready to use
9
+ # Built from: docker build -t openspiel-base:latest -f envs/openspiel_env/server/Dockerfile.openspiel-base .
10
+ # In GitHub Actions, this is overridden to use the GHCR base image
11
+ ARG BASE_IMAGE=openenv-base:latest
12
+ FROM ${BASE_IMAGE}
13
+
14
+ # Avoid interactive prompts during build
15
+ ENV DEBIAN_FRONTEND=noninteractive
16
+ ENV TZ=UTC
17
+
18
+ # Install build dependencies (curl already installed by openenv-base)
19
+ RUN apt-get update && apt-get install -y --no-install-recommends \
20
+ build-essential \
21
+ clang \
22
+ cmake \
23
+ git \
24
+ sudo \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ # Set up OpenSpiel build directory
28
+ RUN mkdir /repo
29
+ WORKDIR /repo
30
+
31
+ # Clone OpenSpiel
32
+ RUN git clone https://github.com/google-deepmind/open_spiel.git .
33
+
34
+ # Run OpenSpiel's installation script (downloads C++ dependencies)
35
+ RUN ./install.sh
36
+
37
+ # Install Python dependencies
38
+ # First upgrade pip and setuptools, then install other packages
39
+ RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel
40
+ RUN pip3 install --no-cache-dir --upgrade pbr testresources importlib_metadata
41
+ RUN pip3 install --no-cache-dir --upgrade -r requirements.txt cmake
42
+
43
+ # Build OpenSpiel with Python 3.11
44
+ # Use the exact same Python executable as the base image
45
+ # Disable gin_rummy to speed up build (complex game, not needed for basic usage)
46
+ RUN mkdir -p build
47
+ WORKDIR /repo/build
48
+ RUN cmake -DPython3_EXECUTABLE=/usr/local/bin/python3 \
49
+ -DCMAKE_CXX_COMPILER=$(which clang++) \
50
+ -DOPEN_SPIEL_BUILD_WITH_GIN_RUMMY=OFF \
51
+ ../open_spiel
52
+ RUN make -j$(nproc) pyspiel
53
+
54
+ # Install OpenSpiel Python requirements
55
+ WORKDIR /repo
56
+ RUN pip3 install --no-cache-dir --upgrade -r requirements.txt
57
+
58
+ # Set Python path for OpenSpiel
59
+ ENV PYTHONPATH=/repo:/repo/build/python:${PYTHONPATH}
60
+
61
+ # Test OpenSpiel import to verify ABI compatibility
62
+ RUN python3 -c "import pyspiel; print('OpenSpiel import successful')" || echo "OpenSpiel import failed"
63
+
64
+ # Clean up build dependencies to reduce image size
65
+ RUN apt-get remove -y build-essential clang cmake git sudo || true && \
66
+ apt-get autoremove -y && \
67
+ apt-get clean && \
68
+ rm -rf /var/lib/apt/lists/*
69
+
70
+ # Set working directory back to /app (standard for openenv-base)
71
+ WORKDIR /app
server/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Server-side implementation for OpenSpiel environments."""
server/app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the OpenSpiel Environment.
9
+
10
+ This module creates an HTTP server that exposes OpenSpiel games
11
+ over HTTP and WebSocket endpoints, compatible with EnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ uv run --project . server
22
+
23
+ Environment variables:
24
+ OPENSPIEL_GAME: Game name to serve (default: "catch")
25
+ OPENSPIEL_AGENT_PLAYER: Agent player ID (default: 0)
26
+ OPENSPIEL_OPPONENT_POLICY: Opponent policy (default: "random")
27
+ """
28
+
29
+ import os
30
+
31
+ # Support both in-repo and standalone imports
32
+ try:
33
+ # In-repo imports (when running from OpenEnv repository)
34
+ from openenv.core.env_server.http_server import create_app
35
+ from ..models import OpenSpielAction, OpenSpielObservation
36
+ from .openspiel_environment import OpenSpielEnvironment
37
+ except ImportError:
38
+ # Standalone imports (when environment is standalone with openenv from pip)
39
+ from openenv.core.env_server.http_server import create_app
40
+ from models import OpenSpielAction, OpenSpielObservation
41
+ from server.openspiel_environment import OpenSpielEnvironment
42
+
43
+ # Get game configuration from environment variables
44
+ game_name = os.getenv("OPENSPIEL_GAME", "catch")
45
+ agent_player = int(os.getenv("OPENSPIEL_AGENT_PLAYER", "0"))
46
+ opponent_policy = os.getenv("OPENSPIEL_OPPONENT_POLICY", "random")
47
+
48
+
49
+ # Factory function to create OpenSpielEnvironment instances
50
+ def create_openspiel_environment():
51
+ """Factory function that creates OpenSpielEnvironment with config."""
52
+ return OpenSpielEnvironment(
53
+ game_name=game_name,
54
+ agent_player=agent_player,
55
+ opponent_policy=opponent_policy,
56
+ )
57
+
58
+
59
+ # Create the FastAPI app with web interface and README integration
60
+ # Pass the factory function instead of an instance for WebSocket session support
61
+ app = create_app(
62
+ create_openspiel_environment,
63
+ OpenSpielAction,
64
+ OpenSpielObservation,
65
+ env_name="openspiel_env",
66
+ )
67
+
68
+
69
+ def main(host: str = "0.0.0.0", port: int = 8000):
70
+ """
71
+ Entry point for direct execution via uv run or python -m.
72
+
73
+ This function enables running the server without Docker:
74
+ uv run --project . server
75
+ uv run --project . server --port 8001
76
+ python -m openspiel_env.server.app
77
+
78
+ Args:
79
+ host: Host address to bind to (default: "0.0.0.0")
80
+ port: Port number to listen on (default: 8000)
81
+ """
82
+ import uvicorn
83
+
84
+ uvicorn.run(app, host=host, port=port)
85
+
86
+
87
+ if __name__ == "__main__":
88
+ main()
server/build_docker.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Script to build the OpenSpiel environment Docker image
9
+ # Usage: ./build_docker.sh [tag]
10
+ #
11
+ # Note: Requires envtorch-base:latest to be built first.
12
+ # See: src/core/containers/images/README.md
13
+
14
+ set -e
15
+
16
+ TAG="${1:-latest}"
17
+ IMAGE_NAME="openspiel-env:${TAG}"
18
+
19
+ echo "๐Ÿณ Building OpenSpiel Environment Docker Image"
20
+ echo "================================================"
21
+ echo "Image: $IMAGE_NAME"
22
+ echo ""
23
+
24
+ # Get script directory
25
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
26
+
27
+ # Navigate to OpenEnv root (4 levels up from server/)
28
+ OPENENV_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
29
+
30
+ echo "๐Ÿ“ OpenEnv root: $OPENENV_ROOT"
31
+ echo ""
32
+
33
+ # Build OpenSpiel environment image
34
+ # Note: Docker will automatically pull ghcr.io/meta-pytorch/openenv-base:latest if needed
35
+ echo "โณ Building (this may take 5-10 minutes due to OpenSpiel compilation)..."
36
+ docker build \
37
+ -f "$SCRIPT_DIR/Dockerfile" \
38
+ -t "$IMAGE_NAME" \
39
+ "$OPENENV_ROOT"
40
+
41
+ if [ $? -eq 0 ]; then
42
+ echo ""
43
+ echo "โœ… Build successful!"
44
+ echo ""
45
+ echo "๐Ÿš€ Run with different games:"
46
+ echo ""
47
+ echo " # Catch (default)"
48
+ echo " docker run -p 8000:8000 $IMAGE_NAME"
49
+ echo ""
50
+ echo " # Tic-Tac-Toe"
51
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=tic_tac_toe $IMAGE_NAME"
52
+ echo ""
53
+ echo " # Kuhn Poker"
54
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=kuhn_poker $IMAGE_NAME"
55
+ echo ""
56
+ echo " # Cliff Walking"
57
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=cliff_walking $IMAGE_NAME"
58
+ echo ""
59
+ echo " # 2048"
60
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=2048 $IMAGE_NAME"
61
+ echo ""
62
+ echo " # Blackjack"
63
+ echo " docker run -p 8000:8000 -e OPENSPIEL_GAME=blackjack $IMAGE_NAME"
64
+ echo ""
65
+ else
66
+ echo ""
67
+ echo "โŒ Build failed!"
68
+ exit 1
69
+ fi
server/openspiel_environment.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ OpenSpiel Environment Server Implementation.
9
+
10
+ This module wraps OpenSpiel's rl_environment.Environment and exposes it
11
+ via the OpenEnv Environment interface.
12
+ """
13
+
14
+ import uuid
15
+ from typing import Any, Dict
16
+
17
+ # Support both in-repo and standalone imports
18
+ try:
19
+ # In-repo imports (when running from OpenEnv repository)
20
+ from openenv.core.env_server.interfaces import Environment
21
+ from ..models import OpenSpielAction, OpenSpielObservation, OpenSpielState
22
+ from .opponent_policies import get_opponent_policy, OpponentPolicy
23
+ except ImportError:
24
+ # Standalone imports (when environment is standalone with openenv from pip)
25
+ from openenv.core.env_server.interfaces import Environment
26
+ from models import OpenSpielAction, OpenSpielObservation, OpenSpielState
27
+ from server.opponent_policies import get_opponent_policy, OpponentPolicy
28
+
29
+ # Import OpenSpiel
30
+ try:
31
+ from open_spiel.python import rl_environment
32
+ import pyspiel
33
+ except ImportError as e:
34
+ raise ImportError(
35
+ "OpenSpiel is not installed. "
36
+ "Please install it following instructions at: "
37
+ "https://github.com/google-deepmind/open_spiel"
38
+ ) from e
39
+
40
+
41
+ class OpenSpielEnvironment(Environment):
42
+ """
43
+ OpenSpiel Environment wrapper for OpenEnv.
44
+
45
+ This environment wraps OpenSpiel games and provides a single-agent interface.
46
+ For multi-player games, the agent controls one player while opponent(s) use
47
+ a fixed policy (e.g., random).
48
+
49
+ Supported games:
50
+ - Single-player: catch, cliff_walking, 2048, blackjack
51
+ - Multi-player: tic_tac_toe, kuhn_poker
52
+
53
+ Args:
54
+ game_name: Name of the OpenSpiel game (e.g., "catch", "tic_tac_toe").
55
+ agent_player: Which player ID the agent controls (default 0).
56
+ opponent_policy: Policy for opponent players ("random", "first", etc.).
57
+ game_params: Optional game-specific parameters.
58
+
59
+ Example:
60
+ >>> env = OpenSpielEnvironment("catch")
61
+ >>> obs = env.reset()
62
+ >>> print(obs.info_state) # Agent's observation
63
+ >>> obs = env.step(OpenSpielAction(action_id=1))
64
+ >>> print(obs.reward)
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ game_name: str = "catch",
70
+ agent_player: int = 0,
71
+ opponent_policy: str = "random",
72
+ game_params: Dict[str, Any] | None = None,
73
+ ):
74
+ """Initialize OpenSpiel environment."""
75
+ super().__init__()
76
+
77
+ self.game_name = game_name
78
+ self.agent_player = agent_player
79
+ self.game_params = game_params or {}
80
+
81
+ # Create OpenSpiel environment
82
+ try:
83
+ self._ospiel_env = rl_environment.Environment(
84
+ game_name, **self.game_params
85
+ )
86
+ except Exception as e:
87
+ raise ValueError(
88
+ f"Failed to create OpenSpiel game '{game_name}': {e}"
89
+ ) from e
90
+
91
+ self.num_players = self._ospiel_env.num_players
92
+ self.is_turn_based = self._ospiel_env.is_turn_based
93
+
94
+ # Validate agent_player
95
+ if agent_player >= self.num_players:
96
+ raise ValueError(
97
+ f"agent_player={agent_player} >= num_players={self.num_players}"
98
+ )
99
+
100
+ # Set up opponent policy for multi-player games
101
+ self.opponent_policy_fn: OpponentPolicy | None = None
102
+ if self.num_players > 1:
103
+ self.opponent_policy_fn = get_opponent_policy(opponent_policy)
104
+
105
+ # Initialize state
106
+ self._state = OpenSpielState(
107
+ game_name=game_name,
108
+ agent_player=agent_player,
109
+ opponent_policy=opponent_policy,
110
+ game_params=self.game_params,
111
+ num_players=self.num_players,
112
+ )
113
+
114
+ # Track last opponent action for learning
115
+ self._last_opponent_action: int | None = None
116
+
117
+ def reset(self) -> OpenSpielObservation:
118
+ """
119
+ Reset the environment and return initial observation.
120
+
121
+ For multi-player games, this will autoplay opponent turns until
122
+ it's the agent's turn (or terminal state).
123
+
124
+ Returns:
125
+ Initial observation for the agent.
126
+ """
127
+ # Reset OpenSpiel environment
128
+ time_step = self._ospiel_env.reset()
129
+
130
+ # Reset state tracking
131
+ self._state.episode_id = str(uuid.uuid4())
132
+ self._state.step_count = 0
133
+ self._last_opponent_action = None
134
+
135
+ # Autoplay opponent turns until agent's turn
136
+ time_step = self._auto_play_opponents(time_step)
137
+
138
+ # Convert to OpenEnv observation
139
+ return self._make_observation(time_step)
140
+
141
+ def step(self, action: OpenSpielAction) -> OpenSpielObservation: # type: ignore[override]
142
+ """
143
+ Execute agent's action and return resulting observation.
144
+
145
+ For multi-player games, this will:
146
+ 1. Apply the agent's action
147
+ 2. Autoplay opponent turns until it's the agent's turn again
148
+ 3. Return the observation from the agent's perspective
149
+
150
+ Args:
151
+ action: OpenSpielAction containing the action_id to execute.
152
+
153
+ Returns:
154
+ Observation after action execution (and opponent turns if multi-player).
155
+
156
+ Raises:
157
+ ValueError: If action is not an OpenSpielAction.
158
+ """
159
+ if not isinstance(action, OpenSpielAction):
160
+ raise ValueError(f"Expected OpenSpielAction, got {type(action)}")
161
+
162
+ # Apply agent's action
163
+ if self.is_turn_based:
164
+ # Turn-based: single action
165
+ time_step = self._ospiel_env.step([action.action_id])
166
+ else:
167
+ # Simultaneous-move: need actions for all players
168
+ # For now, only support agent as player 0 in simultaneous games
169
+ if self.agent_player != 0:
170
+ raise NotImplementedError(
171
+ "Simultaneous-move games only support agent_player=0"
172
+ )
173
+ # Get opponent actions
174
+ opponent_actions = []
175
+ for player_id in range(self.num_players):
176
+ if player_id == self.agent_player:
177
+ opponent_actions.append(action.action_id)
178
+ else:
179
+ legal_actions = time_step.observations["legal_actions"][player_id]
180
+ opp_action = self.opponent_policy_fn.select_action(
181
+ legal_actions, time_step.observations
182
+ )
183
+ opponent_actions.append(opp_action)
184
+ time_step = self._ospiel_env.step(opponent_actions)
185
+
186
+ self._state.step_count += 1
187
+
188
+ # Autoplay opponent turns (for turn-based games)
189
+ if self.is_turn_based:
190
+ time_step = self._auto_play_opponents(time_step)
191
+
192
+ # Convert to OpenEnv observation
193
+ return self._make_observation(time_step)
194
+
195
+ @property
196
+ def state(self) -> OpenSpielState:
197
+ """Get current environment state."""
198
+ return self._state
199
+
200
+ def _auto_play_opponents(self, time_step) -> Any:
201
+ """
202
+ Autoplay opponent turns until it's the agent's turn or game is terminal.
203
+
204
+ Args:
205
+ time_step: Current TimeStep from OpenSpiel environment.
206
+
207
+ Returns:
208
+ Updated TimeStep after opponent moves.
209
+ """
210
+ # Single-player games: nothing to do
211
+ if self.num_players == 1:
212
+ return time_step
213
+
214
+ # Multi-player games: play opponent turns
215
+ while (
216
+ not time_step.last()
217
+ and time_step.observations["current_player"] != self.agent_player
218
+ ):
219
+ current_player = time_step.observations["current_player"]
220
+ legal_actions = time_step.observations["legal_actions"][current_player]
221
+
222
+ # Select opponent action
223
+ opp_action = self.opponent_policy_fn.select_action(
224
+ legal_actions, time_step.observations
225
+ )
226
+ self._last_opponent_action = opp_action
227
+
228
+ # Apply opponent action
229
+ time_step = self._ospiel_env.step([opp_action])
230
+ self._state.step_count += 1
231
+
232
+ return time_step
233
+
234
+ def _make_observation(self, time_step) -> OpenSpielObservation:
235
+ """
236
+ Convert OpenSpiel TimeStep to OpenEnv Observation.
237
+
238
+ Args:
239
+ time_step: OpenSpiel TimeStep object.
240
+
241
+ Returns:
242
+ OpenSpielObservation for the agent.
243
+ """
244
+ # Extract agent's information
245
+ info_state = time_step.observations["info_state"][self.agent_player]
246
+ legal_actions = time_step.observations["legal_actions"][self.agent_player]
247
+ current_player_id = time_step.observations["current_player"]
248
+
249
+ # Determine game phase
250
+ if time_step.last():
251
+ game_phase = "terminal"
252
+ elif time_step.first():
253
+ game_phase = "initial"
254
+ else:
255
+ game_phase = "playing"
256
+
257
+ # Get reward for agent
258
+ reward = None
259
+ if time_step.rewards is not None:
260
+ reward = float(time_step.rewards[self.agent_player])
261
+
262
+ # Create observation
263
+ obs = OpenSpielObservation(
264
+ info_state=info_state.tolist() if hasattr(info_state, "tolist") else list(info_state),
265
+ legal_actions=legal_actions,
266
+ game_phase=game_phase,
267
+ current_player_id=current_player_id,
268
+ opponent_last_action=self._last_opponent_action,
269
+ done=time_step.last(),
270
+ reward=reward,
271
+ )
272
+
273
+ return obs
server/opponent_policies.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Opponent policies for multi-player OpenSpiel games.
9
+
10
+ These policies are used to control non-agent players in multi-player games,
11
+ allowing single-agent RL training against fixed or adaptive opponents.
12
+ """
13
+
14
+ import random
15
+ from typing import Any, Protocol
16
+
17
+
18
+ class OpponentPolicy(Protocol):
19
+ """Protocol for opponent policies."""
20
+
21
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
22
+ """
23
+ Select an action for the opponent.
24
+
25
+ Args:
26
+ legal_actions: List of legal action IDs.
27
+ observations: Current observations from the environment.
28
+
29
+ Returns:
30
+ Selected action ID.
31
+ """
32
+ ...
33
+
34
+
35
+ class RandomOpponent:
36
+ """Random opponent that selects uniformly from legal actions."""
37
+
38
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
39
+ """Select a random legal action."""
40
+ if not legal_actions:
41
+ raise ValueError("No legal actions available")
42
+ return random.choice(legal_actions)
43
+
44
+
45
+ class FixedActionOpponent:
46
+ """Opponent that always selects the same action (e.g., first legal action)."""
47
+
48
+ def __init__(self, action_selector: str = "first"):
49
+ """
50
+ Initialize fixed action opponent.
51
+
52
+ Args:
53
+ action_selector: Which action to select ("first", "last", "middle").
54
+ """
55
+ self.action_selector = action_selector
56
+
57
+ def select_action(self, legal_actions: list[int], observations: dict[str, Any]) -> int:
58
+ """Select a fixed legal action based on selector."""
59
+ if not legal_actions:
60
+ raise ValueError("No legal actions available")
61
+
62
+ if self.action_selector == "first":
63
+ return legal_actions[0]
64
+ elif self.action_selector == "last":
65
+ return legal_actions[-1]
66
+ elif self.action_selector == "middle":
67
+ return legal_actions[len(legal_actions) // 2]
68
+ else:
69
+ return legal_actions[0]
70
+
71
+
72
+ def get_opponent_policy(policy_name: str) -> OpponentPolicy:
73
+ """
74
+ Get an opponent policy by name.
75
+
76
+ Args:
77
+ policy_name: Name of the policy ("random", "first", "last", "middle").
78
+
79
+ Returns:
80
+ OpponentPolicy instance.
81
+
82
+ Raises:
83
+ ValueError: If policy_name is not recognized.
84
+ """
85
+ if policy_name == "random":
86
+ return RandomOpponent()
87
+ elif policy_name in ("first", "last", "middle"):
88
+ return FixedActionOpponent(action_selector=policy_name)
89
+ else:
90
+ raise ValueError(f"Unknown opponent policy: {policy_name}")
server/prepare_hf.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Custom HF deployment script for openspiel_env
3
+ # OpenSpiel uses a different base image with C++ compilation
4
+
5
+ set -e
6
+
7
+ DOCKERFILE_PATH="$1"
8
+ BASE_IMAGE_REF="$2"
9
+
10
+ echo "OpenSpiel: Using custom Dockerfile preparation"
11
+
12
+ # Cross-platform sed in-place editing
13
+ sed_inplace() {
14
+ if sed --version >/dev/null 2>&1; then
15
+ # GNU sed (Linux)
16
+ sed -i "$@"
17
+ else
18
+ # BSD sed (macOS)
19
+ sed -i '' "$@"
20
+ fi
21
+ }
22
+
23
+ # Replace ARG with hardcoded FROM using the special OpenSpiel base
24
+ sed_inplace 's|ARG OPENSPIEL_BASE_IMAGE=.*|FROM ghcr.io/meta-pytorch/openenv-openspiel-base:sha-e622c7e|g' "$DOCKERFILE_PATH"
25
+ sed_inplace '/^FROM \${OPENSPIEL_BASE_IMAGE}/d' "$DOCKERFILE_PATH"
26
+
27
+ echo "OpenSpiel: Modified Dockerfile to use GHCR OpenSpiel base image"
28
+ echo "OpenSpiel builds can take 10-15 minutes due to C++ compilation"
test_docker_all_games.sh ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Automated test script for all OpenSpiel games in Docker
9
+ # Usage: ./test_docker_all_games.sh
10
+
11
+ set -e
12
+
13
+ # Colors for output
14
+ GREEN='\033[0;32m'
15
+ RED='\033[0;31m'
16
+ YELLOW='\033[1;33m'
17
+ BLUE='\033[0;34m'
18
+ NC='\033[0m' # No Color
19
+
20
+ # Configuration
21
+ IMAGE_NAME="openspiel-env:latest"
22
+ CONTAINER_NAME="openspiel-test"
23
+ PORT=8000
24
+ HEALTH_CHECK_URL="http://localhost:${PORT}/health"
25
+ MAX_WAIT=30
26
+
27
+ # Games to test
28
+ GAMES=("catch" "tic_tac_toe" "kuhn_poker" "cliff_walking" "2048" "blackjack")
29
+
30
+ # Results tracking
31
+ declare -a RESULTS
32
+ PASSED=0
33
+ FAILED=0
34
+
35
+ echo -e "${BLUE}========================================${NC}"
36
+ echo -e "${BLUE}OpenSpiel Docker Integration Test${NC}"
37
+ echo -e "${BLUE}========================================${NC}"
38
+ echo ""
39
+
40
+ # Function to cleanup containers
41
+ cleanup() {
42
+ echo -e "${YELLOW}Cleaning up containers...${NC}"
43
+ docker stop ${CONTAINER_NAME} 2>/dev/null || true
44
+ docker rm ${CONTAINER_NAME} 2>/dev/null || true
45
+ }
46
+
47
+ # Function to wait for server health
48
+ wait_for_health() {
49
+ local game=$1
50
+ echo -e " โณ Waiting for server to be ready..."
51
+
52
+ for i in $(seq 1 $MAX_WAIT); do
53
+ if curl -s -f ${HEALTH_CHECK_URL} > /dev/null 2>&1; then
54
+ echo -e " ${GREEN}โœ“${NC} Server ready (${i}s)"
55
+ return 0
56
+ fi
57
+ sleep 1
58
+ done
59
+
60
+ echo -e " ${RED}โœ—${NC} Server health check failed after ${MAX_WAIT}s"
61
+ return 1
62
+ }
63
+
64
+ # Function to test a game
65
+ test_game() {
66
+ local game=$1
67
+ echo -e "\n${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
68
+ echo -e "${BLUE}Testing: ${game}${NC}"
69
+ echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
70
+
71
+ # Stop any existing container
72
+ cleanup
73
+
74
+ # Start container with game
75
+ echo -e " ๐Ÿณ Starting Docker container..."
76
+ docker run -d \
77
+ --name ${CONTAINER_NAME} \
78
+ -p ${PORT}:8000 \
79
+ -e OPENSPIEL_GAME=${game} \
80
+ ${IMAGE_NAME} > /dev/null
81
+
82
+ # Wait for server to be ready
83
+ if ! wait_for_health ${game}; then
84
+ echo -e " ${RED}โœ— FAILED${NC} - Server did not start"
85
+ RESULTS+=("${game}:FAILED:Server did not start")
86
+ FAILED=$((FAILED + 1))
87
+ cleanup
88
+ return 1
89
+ fi
90
+
91
+ # Run Python client test
92
+ echo -e " ๐ŸŽฎ Running Python client test..."
93
+ if NO_PROXY=localhost,127.0.0.1 HTTP_PROXY= HTTPS_PROXY= \
94
+ PYTHONPATH=$PWD/src:$PYTHONPATH \
95
+ python3 examples/openspiel_simple.py > /tmp/test_${game}.log 2>&1; then
96
+
97
+ # Check if episode completed successfully
98
+ if grep -q "Episode finished!" /tmp/test_${game}.log; then
99
+ echo -e " ${GREEN}โœ“ PASSED${NC} - Episode completed successfully"
100
+ RESULTS+=("${game}:PASSED")
101
+ PASSED=$((PASSED + 1))
102
+ else
103
+ echo -e " ${RED}โœ— FAILED${NC} - Episode did not complete"
104
+ RESULTS+=("${game}:FAILED:Episode incomplete")
105
+ FAILED=$((FAILED + 1))
106
+ fi
107
+ else
108
+ echo -e " ${RED}โœ— FAILED${NC} - Python client error"
109
+ RESULTS+=("${game}:FAILED:Client error")
110
+ FAILED=$((FAILED + 1))
111
+ fi
112
+
113
+ # Cleanup
114
+ cleanup
115
+ }
116
+
117
+ # Run tests for all games
118
+ for game in "${GAMES[@]}"; do
119
+ test_game ${game}
120
+ done
121
+
122
+ # Print summary
123
+ echo -e "\n${BLUE}========================================${NC}"
124
+ echo -e "${BLUE}Test Summary${NC}"
125
+ echo -e "${BLUE}========================================${NC}"
126
+ echo ""
127
+
128
+ for result in "${RESULTS[@]}"; do
129
+ IFS=':' read -r game status message <<< "$result"
130
+ if [ "$status" == "PASSED" ]; then
131
+ echo -e " ${GREEN}โœ“${NC} ${game}"
132
+ else
133
+ echo -e " ${RED}โœ—${NC} ${game} - ${message}"
134
+ fi
135
+ done
136
+
137
+ echo ""
138
+ echo -e "Total: ${PASSED} passed, ${FAILED} failed out of ${#GAMES[@]} games"
139
+ echo ""
140
+
141
+ # Exit with appropriate code
142
+ if [ $FAILED -eq 0 ]; then
143
+ echo -e "${GREEN}========================================${NC}"
144
+ echo -e "${GREEN}All tests PASSED! ๐ŸŽ‰${NC}"
145
+ echo -e "${GREEN}========================================${NC}"
146
+ exit 0
147
+ else
148
+ echo -e "${RED}========================================${NC}"
149
+ echo -e "${RED}Some tests FAILED${NC}"
150
+ echo -e "${RED}========================================${NC}"
151
+ exit 1
152
+ fi