feat: finish ps3
This commit is contained in:
622
cs2109s/labs/ps3/ps3.py
Normal file
622
cs2109s/labs/ps3/ps3.py
Normal file
@@ -0,0 +1,622 @@
|
||||
import utils
|
||||
from typing import Union
|
||||
|
||||
Score = Union[int, float]
|
||||
Move = tuple[tuple[int, int], tuple[int, int]]
|
||||
Board = list[list[str]]
|
||||
|
||||
def evaluate(board: Board) -> Score:
|
||||
"""
|
||||
Returns the score of the current position.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
Returns
|
||||
-------
|
||||
An evaluation (as a Score).
|
||||
"""
|
||||
bcount = 0
|
||||
wcount = 0
|
||||
for r, row in enumerate(board):
|
||||
for tile in row:
|
||||
if tile == "B":
|
||||
if r == 5:
|
||||
return utils.WIN
|
||||
bcount += 1
|
||||
elif tile == "W":
|
||||
if r == 0:
|
||||
return -utils.WIN
|
||||
wcount += 1
|
||||
if wcount == 0:
|
||||
return utils.WIN
|
||||
if bcount == 0:
|
||||
return -utils.WIN
|
||||
return bcount - wcount
|
||||
|
||||
def generate_valid_moves(board: Board) -> list[Move]:
|
||||
"""
|
||||
Generates a list containing all possible moves in a particular position for black.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A list of Moves.
|
||||
"""
|
||||
res = []
|
||||
for r, row in enumerate(board):
|
||||
for c, tile in enumerate(row):
|
||||
if tile != "B":
|
||||
continue
|
||||
for d in (-1, 0, 1):
|
||||
dst = r + 1, c + d
|
||||
if utils.is_valid_move(board, (r, c), dst):
|
||||
res.append(((r, c), dst))
|
||||
|
||||
return res
|
||||
|
||||
def test_11():
|
||||
board1 = [
|
||||
["_", "_", "B", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"]
|
||||
]
|
||||
assert sorted(generate_valid_moves(board1)) == [((0, 2), (1, 1)), ((0, 2), (1, 2)), ((0, 2), (1, 3))], "board1 test output is incorrect"
|
||||
|
||||
board2 = [
|
||||
["_", "_", "_", "_", "B", "_"],
|
||||
["_", "_", "_", "_", "_", "B"],
|
||||
["_", "W", "_", "_", "_", "_"],
|
||||
["_", "_", "W", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "B", "_", "_", "_"]
|
||||
]
|
||||
assert sorted(generate_valid_moves(board2)) == [((0, 4), (1, 3)), ((0, 4), (1, 4)), ((1, 5), (2, 4)), ((1, 5), (2, 5))], "board2 test output is incorrect"
|
||||
|
||||
board3 = [
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "B", "_", "_", "_"],
|
||||
["_", "W", "W", "W", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"]
|
||||
]
|
||||
assert sorted(generate_valid_moves(board3)) == [((1, 2), (2, 1)), ((1, 2), (2, 3))], "board3 test output is incorrect"
|
||||
|
||||
# test_11()
|
||||
|
||||
def minimax(
|
||||
board: Board,
|
||||
depth: int,
|
||||
max_depth: int,
|
||||
is_black: bool
|
||||
) -> tuple[Score, Move]:
|
||||
"""
|
||||
Finds the best move for the input board state.
|
||||
Note that you are black.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
depth: int, the depth to search for the best move. When this is equal
|
||||
to `max_depth`, you should get the evaluation of the position using
|
||||
the provided heuristic function.
|
||||
|
||||
max_depth: int, the maximum depth for cutoff.
|
||||
|
||||
is_black: bool. True when finding the best move for black, False
|
||||
otherwise.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
|
||||
evaluation: the best score that black can achieve after this move.
|
||||
src_row, src_col: position of the pawn to move.
|
||||
dst_row, dst_col: position to move the pawn to.
|
||||
"""
|
||||
if depth == max_depth or utils.is_game_over(board):
|
||||
return evaluate(board), None
|
||||
# max value
|
||||
if is_black:
|
||||
v = -utils.INF
|
||||
best_move = None
|
||||
for action in generate_valid_moves(board):
|
||||
next_board = utils.state_change(board, action[0], action[1], False)
|
||||
new_v, _ = minimax(next_board, depth + 1, max_depth, False)
|
||||
if new_v > v:
|
||||
v = new_v
|
||||
best_move = action
|
||||
return v, best_move
|
||||
# min value
|
||||
else:
|
||||
v = utils.INF
|
||||
best_move = None
|
||||
w_board = utils.invert_board(board, False)
|
||||
for action in generate_valid_moves(w_board):
|
||||
next_board = utils.state_change(w_board, action[0], action[1], False)
|
||||
new_v, _ = minimax(utils.invert_board(next_board, False), depth + 1, max_depth, True)
|
||||
if new_v < v:
|
||||
v = new_v
|
||||
best_move = action
|
||||
return v, best_move
|
||||
|
||||
|
||||
def test_21():
|
||||
board1 = [
|
||||
list("______"),
|
||||
list("___B__"),
|
||||
list("____BB"),
|
||||
list("___WB_"),
|
||||
list("_B__WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score1, _ = minimax(board1, 0, 1, True)
|
||||
assert score1 == utils.WIN, "black should win in 1"
|
||||
|
||||
board2 = [
|
||||
list("______"),
|
||||
list("___B__"),
|
||||
list("____BB"),
|
||||
list("_BW_B_"),
|
||||
list("____WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score2, _ = minimax(board2, 0, 3, True)
|
||||
assert score2 == utils.WIN, "black should win in 3"
|
||||
|
||||
board3 = [
|
||||
list("______"),
|
||||
list("__B___"),
|
||||
list("_WWW__"),
|
||||
list("______"),
|
||||
list("______"),
|
||||
list("______"),
|
||||
]
|
||||
score3, _ = minimax(board3, 0, 4, True)
|
||||
assert score3 == -utils.WIN, "white should win in 4"
|
||||
|
||||
# test_21()
|
||||
|
||||
def negamax(
|
||||
board: Board,
|
||||
depth: int,
|
||||
max_depth: int
|
||||
) -> tuple[Score, Move]:
|
||||
"""
|
||||
Finds the best move for the input board state.
|
||||
Note that you are black.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
depth: int, the depth to search for the best move. When this is equal
|
||||
to `max_depth`, you should get the evaluation of the position using
|
||||
the provided heuristic function.
|
||||
|
||||
max_depth: int, the maximum depth for cutoff.
|
||||
|
||||
Notice that you no longer need the parameter `is_black`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
|
||||
evaluation: the best score that black can achieve after this move.
|
||||
src_row, src_col: position of the pawn to move.
|
||||
dst_row, dst_col: position to move the pawn to.
|
||||
"""
|
||||
if depth == max_depth or utils.is_game_over(board):
|
||||
return evaluate(board), None
|
||||
v = -utils.INF
|
||||
best_move = None
|
||||
for action in generate_valid_moves(board):
|
||||
# if current is black, then the next board is white. It is inverted after applying the black move
|
||||
next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
|
||||
# since this move is white, the score needs to be negated (to get the min value of the white move)
|
||||
new_v, _ = negamax(next_board, depth + 1, max_depth)
|
||||
# if the negated score is greater than the current max value, update the max value and the best move
|
||||
if -new_v > v:
|
||||
v = -new_v
|
||||
best_move = action
|
||||
return v, best_move
|
||||
|
||||
def test_22():
|
||||
board1 = [
|
||||
list("______"),
|
||||
list("___B__"),
|
||||
list("____BB"),
|
||||
list("___WB_"),
|
||||
list("_B__WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score1, _ = negamax(board1, 0, 1)
|
||||
assert score1 == utils.WIN, "black should win in 1"
|
||||
|
||||
board2 = [
|
||||
list("______"),
|
||||
list("___B__"),
|
||||
list("____BB"),
|
||||
list("_BW_B_"),
|
||||
list("____WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score2, _ = negamax(board2, 0, 3)
|
||||
assert score2 == utils.WIN, "black should win in 3"
|
||||
|
||||
board3 = [
|
||||
list("______"),
|
||||
list("__B___"),
|
||||
list("_WWW__"),
|
||||
list("______"),
|
||||
list("______"),
|
||||
list("______"),
|
||||
]
|
||||
score3, _ = negamax(board3, 0, 4)
|
||||
assert score3 == -utils.WIN, "white should win in 4"
|
||||
|
||||
# test_22()
|
||||
|
||||
def minimax_alpha_beta(
|
||||
board: Board,
|
||||
depth: int,
|
||||
max_depth: int,
|
||||
alpha: Score,
|
||||
beta: Score,
|
||||
is_black: bool
|
||||
) -> tuple[Score, Move]:
|
||||
"""
|
||||
Finds the best move for the input board state.
|
||||
Note that you are black.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
depth: int, the depth to search for the best move. When this is equal
|
||||
to `max_depth`, you should get the evaluation of the position using
|
||||
the provided heuristic function.
|
||||
|
||||
max_depth: int, the maximum depth for cutoff.
|
||||
|
||||
alpha: Score. The alpha value in a given state.
|
||||
|
||||
beta: Score. The beta value in a given state.
|
||||
|
||||
is_black: bool. True when finding the best move for black, False
|
||||
otherwise.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
|
||||
evaluation: the best score that black can achieve after this move.
|
||||
src_row, src_col: position of the pawn to move.
|
||||
dst_row, dst_col: position to move the pawn to.
|
||||
"""
|
||||
# base case
|
||||
if depth == max_depth or utils.is_game_over(board):
|
||||
return evaluate(board), None
|
||||
|
||||
# max value
|
||||
if is_black:
|
||||
v = -utils.INF
|
||||
best_move = None
|
||||
for action in generate_valid_moves(board):
|
||||
next_board = utils.state_change(board, action[0], action[1], False)
|
||||
new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, False)
|
||||
if new_v > v:
|
||||
v = new_v
|
||||
best_move = action
|
||||
if v >= beta:
|
||||
break
|
||||
alpha = max(alpha, v)
|
||||
return v, best_move
|
||||
# min value
|
||||
else:
|
||||
v = utils.INF
|
||||
best_move = None
|
||||
w_board = utils.invert_board(board, False)
|
||||
for action in generate_valid_moves(w_board):
|
||||
next_board = utils.invert_board(utils.state_change(w_board, action[0], action[1], False), False)
|
||||
new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, True)
|
||||
if new_v < v:
|
||||
v = new_v
|
||||
best_move = action
|
||||
if v <= alpha:
|
||||
break
|
||||
beta = min(beta, v)
|
||||
return v, best_move
|
||||
|
||||
def test_31():
|
||||
board1 = [
|
||||
list("______"),
|
||||
list("__BB__"),
|
||||
list("____BB"),
|
||||
list("WBW_B_"),
|
||||
list("____WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score1, _ = minimax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF, True)
|
||||
assert score1 == utils.WIN, "black should win in 3"
|
||||
|
||||
|
||||
board2 = [
|
||||
list("____B_"),
|
||||
list("___B__"),
|
||||
list("__B___"),
|
||||
list("_WWW__"),
|
||||
list("____W_"),
|
||||
list("______"),
|
||||
]
|
||||
score2, _ = minimax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF, True)
|
||||
assert score2 == utils.WIN, "black should win in 5"
|
||||
|
||||
board3 = [
|
||||
list("____B_"),
|
||||
list("__BB__"),
|
||||
list("______"),
|
||||
list("_WWW__"),
|
||||
list("____W_"),
|
||||
list("______"),
|
||||
]
|
||||
score3, m = minimax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF, True)
|
||||
print(m)
|
||||
assert score3 == -utils.WIN, "white should win in 6"
|
||||
|
||||
# print("custom")
|
||||
# print(minimax_alpha_beta([['_', '_', '_', '_', 'B', '_'], ['_', '_', 'B', 'B', '_', '_'], ['_', '_', '_', '_', '_', '_'], ['_', 'W', 'W', 'W', '_', '_'], ['_', '_', '_', '_', 'W', '_'], ['_', '_', '_', '_', '_', '_']],
|
||||
# 0, 6, -utils.INF, utils.INF, True))
|
||||
# print("custom end")
|
||||
|
||||
def negamax_alpha_beta(
|
||||
board: Board,
|
||||
depth: int,
|
||||
max_depth: int,
|
||||
alpha: Score,
|
||||
beta: Score
|
||||
) -> tuple[Score, Move]:
|
||||
"""
|
||||
Finds the best move for the input board state.
|
||||
Note that you are black.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
board: 2D list of lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
depth: int, the depth to search for the best move. When this is equal
|
||||
to `max_depth`, you should get the evaluation of the position using
|
||||
the provided heuristic function.
|
||||
|
||||
max_depth: int, the maximum depth for cutoff.
|
||||
|
||||
alpha: Score. The alpha value in a given state.
|
||||
|
||||
beta: Score. The beta value in a given state.
|
||||
|
||||
Notice that you no longer need the parameter `is_black`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
|
||||
evaluation: the best score that black can achieve after this move.
|
||||
src_row, src_col: position of the pawn to move.
|
||||
dst_row, dst_col: position to move the pawn to.
|
||||
"""
|
||||
if depth == max_depth or utils.is_game_over(board):
|
||||
return evaluate(board), None
|
||||
best_move = None
|
||||
for action in generate_valid_moves(board):
|
||||
# if current is black, then the next board is white. It is inverted after applying the black move
|
||||
next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
|
||||
# since this move is white, the score needs to be negated (to get the min value of the white move)
|
||||
new_v, _ = negamax_alpha_beta(next_board, depth + 1, max_depth, -beta, -alpha)
|
||||
# if the negated score is greater than the current max value, update the max value and the best move
|
||||
if -new_v > alpha:
|
||||
alpha = -new_v
|
||||
best_move = action
|
||||
if alpha >= beta:
|
||||
break
|
||||
return alpha, best_move
|
||||
|
||||
def test_32():
|
||||
board1 = [
|
||||
list("______"),
|
||||
list("__BB__"),
|
||||
list("____BB"),
|
||||
list("WBW_B_"),
|
||||
list("____WW"),
|
||||
list("_WW___"),
|
||||
]
|
||||
score1, _ = negamax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF)
|
||||
assert score1 == utils.WIN, "black should win in 3"
|
||||
|
||||
board2 = [
|
||||
list("____B_"),
|
||||
list("___B__"),
|
||||
list("__B___"),
|
||||
list("_WWW__"),
|
||||
list("____W_"),
|
||||
list("______"),
|
||||
]
|
||||
score2, _ = negamax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF)
|
||||
assert score2 == utils.WIN, "black should win in 5"
|
||||
|
||||
board3 = [
|
||||
list("____B_"),
|
||||
list("__BB__"),
|
||||
list("______"),
|
||||
list("_WWW__"),
|
||||
list("____W_"),
|
||||
list("______"),
|
||||
]
|
||||
score3, _ = negamax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF)
|
||||
assert score3 == -utils.WIN, "white should win in 6"
|
||||
|
||||
# test_32()
|
||||
|
||||
# Uncomment and implement the function.
|
||||
# Note: this will override the provided `evaluate` function.
|
||||
|
||||
def evaluate(board: Board) -> Score:
|
||||
score = 0
|
||||
bcount = 0
|
||||
wcount = 0
|
||||
for r, row in enumerate(board):
|
||||
for tile in row:
|
||||
if tile == "B":
|
||||
bcount += 1
|
||||
if r == 0:
|
||||
score += 10
|
||||
if r == 1:
|
||||
score += 10
|
||||
if r == 2:
|
||||
score += 20
|
||||
if r == 3:
|
||||
score += 40
|
||||
if r == 4:
|
||||
score += 50
|
||||
if r == 5:
|
||||
return utils.WIN
|
||||
elif tile == "W":
|
||||
wcount += 1
|
||||
if r == 0:
|
||||
return -utils.WIN
|
||||
if r == 1:
|
||||
score -= 50
|
||||
if r == 2:
|
||||
score -= 40
|
||||
if r == 3:
|
||||
score -= 20
|
||||
if r == 4:
|
||||
score -= 10
|
||||
if r == 5:
|
||||
score -= 10
|
||||
if wcount == 0:
|
||||
return utils.WIN
|
||||
if bcount == 0:
|
||||
return -utils.WIN
|
||||
return score
|
||||
|
||||
def test_41():
|
||||
board1 = [
|
||||
["_", "_", "_", "B", "_", "_"],
|
||||
["_", "_", "_", "W", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "B", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"]
|
||||
]
|
||||
assert evaluate(board1) == 0
|
||||
|
||||
board2 = [
|
||||
["_", "_", "_", "B", "W", "_"],
|
||||
["_", "_", "_", "W", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"]
|
||||
]
|
||||
assert evaluate(board2) == -utils.WIN
|
||||
|
||||
board3 = [
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "B", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "_", "_", "_", "_", "_"]
|
||||
]
|
||||
assert evaluate(board3) == utils.WIN
|
||||
|
||||
test_41()
|
||||
|
||||
class PlayerAI:
|
||||
|
||||
def make_move(self, board: Board) -> Move:
|
||||
"""
|
||||
This is the function that will be called from main.py
|
||||
You should combine the functions in the previous tasks
|
||||
to implement this function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
self: object instance itself, passed in automatically by Python.
|
||||
|
||||
board: 2D list-of-lists. Contains characters "B", "W", and "_",
|
||||
representing black pawn, white pawn, and empty cell, respectively.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A tuple of tuples containing coordinates (row_index, col_index).
|
||||
The first tuple contains the source position of the black pawn
|
||||
to be moved, the second tuple contains the destination position.
|
||||
"""
|
||||
# TODO: Replace starter code with your AI
|
||||
################
|
||||
# Starter code #
|
||||
################
|
||||
for r in range(len(board)):
|
||||
for c in range(len(board[r])):
|
||||
# check if B can move forward directly
|
||||
if board[r][c] == "B" and board[r + 1][c] == "_":
|
||||
src = r, c
|
||||
dst = r + 1, c
|
||||
return src, dst # valid move
|
||||
return (0, 0), (0, 0) # invalid move
|
||||
|
||||
class PlayerNaive:
|
||||
"""
|
||||
A naive agent that will always return the first available valid move.
|
||||
"""
|
||||
def make_move(self, board: Board) -> Move:
|
||||
return utils.generate_rand_move(board)
|
||||
|
||||
##########################
|
||||
# Game playing framework #
|
||||
##########################
|
||||
if __name__ == "__main__":
|
||||
assert utils.test_move([
|
||||
["B", "B", "B", "B", "B", "B"],
|
||||
["_", "B", "B", "B", "B", "B"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "B", "_", "_", "_", "_"],
|
||||
["_", "W", "W", "W", "W", "W"],
|
||||
["W", "W", "W", "W", "W", "W"]
|
||||
], PlayerAI())
|
||||
|
||||
assert utils.test_move([
|
||||
["_", "B", "B", "B", "B", "B"],
|
||||
["_", "B", "B", "B", "B", "B"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "B", "_", "_", "_", "_"],
|
||||
["W", "W", "W", "W", "W", "W"],
|
||||
["_", "_", "W", "W", "W", "W"]
|
||||
], PlayerAI())
|
||||
|
||||
assert utils.test_move([
|
||||
["_", "_", "B", "B", "B", "B"],
|
||||
["_", "B", "B", "B", "B", "B"],
|
||||
["_", "_", "_", "_", "_", "_"],
|
||||
["_", "B", "W", "_", "_", "_"],
|
||||
["_", "W", "W", "W", "W", "W"],
|
||||
["_", "_", "_", "W", "W", "W"]
|
||||
], PlayerAI())
|
||||
|
||||
# generates initial board
|
||||
# board = utils.generate_init_state()
|
||||
# res = utils.play(PlayerAI(), PlayerNaive(), board)
|
||||
# # Black wins means your agent wins.
|
||||
# print(res)
|
||||
Reference in New Issue
Block a user