File size: 6,836 Bytes
c2cac70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import numpy as np

WIN_PATTERNS = [
    (0, 1, 2),
    (3, 4, 5),
    (6, 7, 8),
    (0, 3, 6),
    (1, 4, 7),
    (2, 5, 8),
    (0, 4, 8),
    (2, 4, 6),
]

class UltimateTicTacToe:
    """
    A very, very simple game of ConnectX in which we have:
        rows: 1
        columns: 4
        winNumber: 2
    """

    def __init__(self):
        self.cells = 81
        self.board_width = 9
        self.state_planes = 9

    def get_init_board(self):
        b = np.zeros((self.cells,), dtype=int)
        return (b, None)

    def get_board_size(self):
        return (self.state_planes, self.board_width, self.board_width)

    def get_action_size(self):
        return self.cells

    def get_next_state(self, board, player, action, verify_move=False):
        if verify_move:
            if self.get_valid_moves(board)[action] == 0:
                return False
        new_board_data = np.copy(board[0])
        new_board_data[action] = player

        next_board = ((action // 9) % 3) * 3 + (action % 3)
        next_board = next_board if not self.is_board_full(new_board_data, next_board) else None

        # Return the new game, but
        # change the perspective of the game with negative
        return ((new_board_data, next_board), -player)

    def is_board_full(self, board_data, next_board):
        return self._is_small_board_win(board_data, next_board, 1) or self._is_small_board_win(board_data, next_board, -1) or self._is_board_full(board_data, next_board) 

    def _small_board_cells(self, inner_board_idx):
        row_block = inner_board_idx // 3
        col_block = inner_board_idx % 3

        base = row_block * 27 + col_block * 3

        return [
            base, base + 1, base + 2,
            base + 9, base + 10, base + 11,
            base + 18, base + 19, base + 20
        ]

    def _is_board_full(self, board_data, next_board):
        # Check if it is literally full
        cells = self._small_board_cells(next_board)

        for a in cells:
            if board_data[a] == 0:
                return False
        return True

    def _is_playable_small_board(self, board_data, inner_board_idx):
        return not self.is_board_full(board_data, inner_board_idx)
    
    def has_legal_moves(self, board):
        valid_moves = self.get_valid_moves(board)
        for i in valid_moves:
            if i == 1:
                return True
        return False

    def get_valid_moves(self, board):
        # All moves are invalid by default
        board_data, active_board = board
        valid_moves = [0] * self.get_action_size()

        if active_board is not None and not self._is_playable_small_board(board_data, active_board):
            active_board = None

        if active_board is None:
            playable_boards = [
                inner_board_idx
                for inner_board_idx in range(9)
                if self._is_playable_small_board(board_data, inner_board_idx)
            ]
            for inner_board_idx in playable_boards:
                for index in self._small_board_cells(inner_board_idx):
                    if board_data[index] == 0:
                        valid_moves[index] = 1
        else:
            for index in self._small_board_cells(active_board):
                if board_data[index] == 0:
                    valid_moves[index] = 1

        return valid_moves

    def _is_small_board_win(self, board_data, inner_board_idx, player):
        cells = self._small_board_cells(inner_board_idx)
    
        for a, b, c in WIN_PATTERNS:
            if board_data[cells[a]] == board_data[cells[b]] == board_data[cells[c]] == player:
                return True
        
        return False
    
    def is_win(self, board, player):
        board_data, _ = board
        won = [self._is_small_board_win(board_data, i, player) for i in range(9)]
        
        # Check if any winning combination is all 1s
        for a, b, c in WIN_PATTERNS:
            if won[a] and won[b] and won[c]:
                return True
        
        return False

    def get_reward_for_player(self, board, player):
        # return None if not ended, 1 if player 1 wins, -1 if player 1 lost

        if self.is_win(board, player):
            return 1
        if self.is_win(board, -player):
            return -1
        if self.has_legal_moves(board):
            return None

        return 0

    def get_canonical_board_data(self, board_data, player):
        return player * board_data

    def _small_board_mask(self, inner_board_idx):
        mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
        for index in self._small_board_cells(inner_board_idx):
            row = index // self.board_width
            col = index % self.board_width
            mask[row, col] = 1.0
        return mask

    def encode_state(self, board):
        board_data, active_board = board
        board_grid = board_data.reshape(self.board_width, self.board_width)

        current_stones = (board_grid == 1).astype(np.float32)
        opponent_stones = (board_grid == -1).astype(np.float32)
        empty_cells = (board_grid == 0).astype(np.float32)
        legal_moves = np.array(self.get_valid_moves(board), dtype=np.float32).reshape(self.board_width, self.board_width)

        active_board_mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
        if active_board is not None and self._is_playable_small_board(board_data, active_board):
            active_board_mask = self._small_board_mask(active_board)

        current_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
        opponent_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
        playable_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)

        for inner_board_idx in range(9):
            board_mask = self._small_board_mask(inner_board_idx)
            if self._is_small_board_win(board_data, inner_board_idx, 1):
                current_won_boards += board_mask
            elif self._is_small_board_win(board_data, inner_board_idx, -1):
                opponent_won_boards += board_mask

            if self._is_playable_small_board(board_data, inner_board_idx):
                playable_boards += board_mask

        move_count = np.count_nonzero(board_data) / self.cells
        move_count_plane = np.full((self.board_width, self.board_width), move_count, dtype=np.float32)

        return np.stack(
            (
                current_stones,
                opponent_stones,
                empty_cells,
                legal_moves,
                active_board_mask,
                current_won_boards,
                opponent_won_boards,
                playable_boards,
                move_count_plane,
            ),
            axis=0,
        )