nus/cs2109s/labs/ps3/ps3.py
2024-02-20 11:30:55 +08:00

623 lines
19 KiB
Python

import utils
from typing import Union
Score = Union[int, float]
Move = tuple[tuple[int, int], tuple[int, int]]
Board = list[list[str]]
def evaluate(board: Board) -> Score:
"""
Returns the score of the current position.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
Returns
-------
An evaluation (as a Score).
"""
bcount = 0
wcount = 0
for r, row in enumerate(board):
for tile in row:
if tile == "B":
if r == 5:
return utils.WIN
bcount += 1
elif tile == "W":
if r == 0:
return -utils.WIN
wcount += 1
if wcount == 0:
return utils.WIN
if bcount == 0:
return -utils.WIN
return bcount - wcount
def generate_valid_moves(board: Board) -> list[Move]:
"""
Generates a list containing all possible moves in a particular position for black.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
Returns
-------
A list of Moves.
"""
res = []
for r, row in enumerate(board):
for c, tile in enumerate(row):
if tile != "B":
continue
for d in (-1, 0, 1):
dst = r + 1, c + d
if utils.is_valid_move(board, (r, c), dst):
res.append(((r, c), dst))
return res
def test_11():
board1 = [
["_", "_", "B", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"]
]
assert sorted(generate_valid_moves(board1)) == [((0, 2), (1, 1)), ((0, 2), (1, 2)), ((0, 2), (1, 3))], "board1 test output is incorrect"
board2 = [
["_", "_", "_", "_", "B", "_"],
["_", "_", "_", "_", "_", "B"],
["_", "W", "_", "_", "_", "_"],
["_", "_", "W", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "B", "_", "_", "_"]
]
assert sorted(generate_valid_moves(board2)) == [((0, 4), (1, 3)), ((0, 4), (1, 4)), ((1, 5), (2, 4)), ((1, 5), (2, 5))], "board2 test output is incorrect"
board3 = [
["_", "_", "_", "_", "_", "_"],
["_", "_", "B", "_", "_", "_"],
["_", "W", "W", "W", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"]
]
assert sorted(generate_valid_moves(board3)) == [((1, 2), (2, 1)), ((1, 2), (2, 3))], "board3 test output is incorrect"
# test_11()
def minimax(
board: Board,
depth: int,
max_depth: int,
is_black: bool
) -> tuple[Score, Move]:
"""
Finds the best move for the input board state.
Note that you are black.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
depth: int, the depth to search for the best move. When this is equal
to `max_depth`, you should get the evaluation of the position using
the provided heuristic function.
max_depth: int, the maximum depth for cutoff.
is_black: bool. True when finding the best move for black, False
otherwise.
Returns
-------
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
evaluation: the best score that black can achieve after this move.
src_row, src_col: position of the pawn to move.
dst_row, dst_col: position to move the pawn to.
"""
if depth == max_depth or utils.is_game_over(board):
return evaluate(board), None
# max value
if is_black:
v = -utils.INF
best_move = None
for action in generate_valid_moves(board):
next_board = utils.state_change(board, action[0], action[1], False)
new_v, _ = minimax(next_board, depth + 1, max_depth, False)
if new_v > v:
v = new_v
best_move = action
return v, best_move
# min value
else:
v = utils.INF
best_move = None
w_board = utils.invert_board(board, False)
for action in generate_valid_moves(w_board):
next_board = utils.state_change(w_board, action[0], action[1], False)
new_v, _ = minimax(utils.invert_board(next_board, False), depth + 1, max_depth, True)
if new_v < v:
v = new_v
best_move = action
return v, best_move
def test_21():
board1 = [
list("______"),
list("___B__"),
list("____BB"),
list("___WB_"),
list("_B__WW"),
list("_WW___"),
]
score1, _ = minimax(board1, 0, 1, True)
assert score1 == utils.WIN, "black should win in 1"
board2 = [
list("______"),
list("___B__"),
list("____BB"),
list("_BW_B_"),
list("____WW"),
list("_WW___"),
]
score2, _ = minimax(board2, 0, 3, True)
assert score2 == utils.WIN, "black should win in 3"
board3 = [
list("______"),
list("__B___"),
list("_WWW__"),
list("______"),
list("______"),
list("______"),
]
score3, _ = minimax(board3, 0, 4, True)
assert score3 == -utils.WIN, "white should win in 4"
# test_21()
def negamax(
board: Board,
depth: int,
max_depth: int
) -> tuple[Score, Move]:
"""
Finds the best move for the input board state.
Note that you are black.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
depth: int, the depth to search for the best move. When this is equal
to `max_depth`, you should get the evaluation of the position using
the provided heuristic function.
max_depth: int, the maximum depth for cutoff.
Notice that you no longer need the parameter `is_black`.
Returns
-------
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
evaluation: the best score that black can achieve after this move.
src_row, src_col: position of the pawn to move.
dst_row, dst_col: position to move the pawn to.
"""
if depth == max_depth or utils.is_game_over(board):
return evaluate(board), None
v = -utils.INF
best_move = None
for action in generate_valid_moves(board):
# if current is black, then the next board is white. It is inverted after applying the black move
next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
# since this move is white, the score needs to be negated (to get the min value of the white move)
new_v, _ = negamax(next_board, depth + 1, max_depth)
# if the negated score is greater than the current max value, update the max value and the best move
if -new_v > v:
v = -new_v
best_move = action
return v, best_move
def test_22():
board1 = [
list("______"),
list("___B__"),
list("____BB"),
list("___WB_"),
list("_B__WW"),
list("_WW___"),
]
score1, _ = negamax(board1, 0, 1)
assert score1 == utils.WIN, "black should win in 1"
board2 = [
list("______"),
list("___B__"),
list("____BB"),
list("_BW_B_"),
list("____WW"),
list("_WW___"),
]
score2, _ = negamax(board2, 0, 3)
assert score2 == utils.WIN, "black should win in 3"
board3 = [
list("______"),
list("__B___"),
list("_WWW__"),
list("______"),
list("______"),
list("______"),
]
score3, _ = negamax(board3, 0, 4)
assert score3 == -utils.WIN, "white should win in 4"
# test_22()
def minimax_alpha_beta(
board: Board,
depth: int,
max_depth: int,
alpha: Score,
beta: Score,
is_black: bool
) -> tuple[Score, Move]:
"""
Finds the best move for the input board state.
Note that you are black.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
depth: int, the depth to search for the best move. When this is equal
to `max_depth`, you should get the evaluation of the position using
the provided heuristic function.
max_depth: int, the maximum depth for cutoff.
alpha: Score. The alpha value in a given state.
beta: Score. The beta value in a given state.
is_black: bool. True when finding the best move for black, False
otherwise.
Returns
-------
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
evaluation: the best score that black can achieve after this move.
src_row, src_col: position of the pawn to move.
dst_row, dst_col: position to move the pawn to.
"""
# base case
if depth == max_depth or utils.is_game_over(board):
return evaluate(board), None
# max value
if is_black:
v = -utils.INF
best_move = None
for action in generate_valid_moves(board):
next_board = utils.state_change(board, action[0], action[1], False)
new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, False)
if new_v > v:
v = new_v
best_move = action
if v >= beta:
break
alpha = max(alpha, v)
return v, best_move
# min value
else:
v = utils.INF
best_move = None
w_board = utils.invert_board(board, False)
for action in generate_valid_moves(w_board):
next_board = utils.invert_board(utils.state_change(w_board, action[0], action[1], False), False)
new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, True)
if new_v < v:
v = new_v
best_move = action
if v <= alpha:
break
beta = min(beta, v)
return v, best_move
def test_31():
board1 = [
list("______"),
list("__BB__"),
list("____BB"),
list("WBW_B_"),
list("____WW"),
list("_WW___"),
]
score1, _ = minimax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF, True)
assert score1 == utils.WIN, "black should win in 3"
board2 = [
list("____B_"),
list("___B__"),
list("__B___"),
list("_WWW__"),
list("____W_"),
list("______"),
]
score2, _ = minimax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF, True)
assert score2 == utils.WIN, "black should win in 5"
board3 = [
list("____B_"),
list("__BB__"),
list("______"),
list("_WWW__"),
list("____W_"),
list("______"),
]
score3, m = minimax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF, True)
print(m)
assert score3 == -utils.WIN, "white should win in 6"
# print("custom")
# print(minimax_alpha_beta([['_', '_', '_', '_', 'B', '_'], ['_', '_', 'B', 'B', '_', '_'], ['_', '_', '_', '_', '_', '_'], ['_', 'W', 'W', 'W', '_', '_'], ['_', '_', '_', '_', 'W', '_'], ['_', '_', '_', '_', '_', '_']],
# 0, 6, -utils.INF, utils.INF, True))
# print("custom end")
def negamax_alpha_beta(
board: Board,
depth: int,
max_depth: int,
alpha: Score,
beta: Score
) -> tuple[Score, Move]:
"""
Finds the best move for the input board state.
Note that you are black.
Parameters
----------
board: 2D list of lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
depth: int, the depth to search for the best move. When this is equal
to `max_depth`, you should get the evaluation of the position using
the provided heuristic function.
max_depth: int, the maximum depth for cutoff.
alpha: Score. The alpha value in a given state.
beta: Score. The beta value in a given state.
Notice that you no longer need the parameter `is_black`.
Returns
-------
A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
evaluation: the best score that black can achieve after this move.
src_row, src_col: position of the pawn to move.
dst_row, dst_col: position to move the pawn to.
"""
if depth == max_depth or utils.is_game_over(board):
return evaluate(board), None
best_move = None
for action in generate_valid_moves(board):
# if current is black, then the next board is white. It is inverted after applying the black move
next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
# since this move is white, the score needs to be negated (to get the min value of the white move)
new_v, _ = negamax_alpha_beta(next_board, depth + 1, max_depth, -beta, -alpha)
# if the negated score is greater than the current max value, update the max value and the best move
if -new_v > alpha:
alpha = -new_v
best_move = action
if alpha >= beta:
break
return alpha, best_move
def test_32():
board1 = [
list("______"),
list("__BB__"),
list("____BB"),
list("WBW_B_"),
list("____WW"),
list("_WW___"),
]
score1, _ = negamax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF)
assert score1 == utils.WIN, "black should win in 3"
board2 = [
list("____B_"),
list("___B__"),
list("__B___"),
list("_WWW__"),
list("____W_"),
list("______"),
]
score2, _ = negamax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF)
assert score2 == utils.WIN, "black should win in 5"
board3 = [
list("____B_"),
list("__BB__"),
list("______"),
list("_WWW__"),
list("____W_"),
list("______"),
]
score3, _ = negamax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF)
assert score3 == -utils.WIN, "white should win in 6"
# test_32()
# Uncomment and implement the function.
# Note: this will override the provided `evaluate` function.
def evaluate(board: Board) -> Score:
score = 0
bcount = 0
wcount = 0
for r, row in enumerate(board):
for tile in row:
if tile == "B":
bcount += 1
if r == 0:
score += 10
if r == 1:
score += 10
if r == 2:
score += 20
if r == 3:
score += 40
if r == 4:
score += 50
if r == 5:
return utils.WIN
elif tile == "W":
wcount += 1
if r == 0:
return -utils.WIN
if r == 1:
score -= 50
if r == 2:
score -= 40
if r == 3:
score -= 20
if r == 4:
score -= 10
if r == 5:
score -= 10
if wcount == 0:
return utils.WIN
if bcount == 0:
return -utils.WIN
return score
def test_41():
board1 = [
["_", "_", "_", "B", "_", "_"],
["_", "_", "_", "W", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "B", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"]
]
assert evaluate(board1) == 0
board2 = [
["_", "_", "_", "B", "W", "_"],
["_", "_", "_", "W", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"]
]
assert evaluate(board2) == -utils.WIN
board3 = [
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "B", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"],
["_", "_", "_", "_", "_", "_"]
]
assert evaluate(board3) == utils.WIN
test_41()
class PlayerAI:
def make_move(self, board: Board) -> Move:
"""
This is the function that will be called from main.py
You should combine the functions in the previous tasks
to implement this function.
Parameters
----------
self: object instance itself, passed in automatically by Python.
board: 2D list-of-lists. Contains characters "B", "W", and "_",
representing black pawn, white pawn, and empty cell, respectively.
Returns
-------
A tuple of tuples containing coordinates (row_index, col_index).
The first tuple contains the source position of the black pawn
to be moved, the second tuple contains the destination position.
"""
# TODO: Replace starter code with your AI
################
# Starter code #
################
for r in range(len(board)):
for c in range(len(board[r])):
# check if B can move forward directly
if board[r][c] == "B" and board[r + 1][c] == "_":
src = r, c
dst = r + 1, c
return src, dst # valid move
return (0, 0), (0, 0) # invalid move
class PlayerNaive:
"""
A naive agent that will always return the first available valid move.
"""
def make_move(self, board: Board) -> Move:
return utils.generate_rand_move(board)
##########################
# Game playing framework #
##########################
if __name__ == "__main__":
assert utils.test_move([
["B", "B", "B", "B", "B", "B"],
["_", "B", "B", "B", "B", "B"],
["_", "_", "_", "_", "_", "_"],
["_", "B", "_", "_", "_", "_"],
["_", "W", "W", "W", "W", "W"],
["W", "W", "W", "W", "W", "W"]
], PlayerAI())
assert utils.test_move([
["_", "B", "B", "B", "B", "B"],
["_", "B", "B", "B", "B", "B"],
["_", "_", "_", "_", "_", "_"],
["_", "B", "_", "_", "_", "_"],
["W", "W", "W", "W", "W", "W"],
["_", "_", "W", "W", "W", "W"]
], PlayerAI())
assert utils.test_move([
["_", "_", "B", "B", "B", "B"],
["_", "B", "B", "B", "B", "B"],
["_", "_", "_", "_", "_", "_"],
["_", "B", "W", "_", "_", "_"],
["_", "W", "W", "W", "W", "W"],
["_", "_", "_", "W", "W", "W"]
], PlayerAI())
# generates initial board
# board = utils.generate_init_state()
# res = utils.play(PlayerAI(), PlayerNaive(), board)
# # Black wins means your agent wins.
# print(res)