feat: finish ps3

2024-02-20 11:30:55 +08:00
parent 7cdb61b354
commit 3287079e24
8 changed files with 1831 additions and 0 deletions
--- a/cs2109s/labs/ps3/ps3.py
+++ b/cs2109s/labs/ps3/ps3.py
@@ -0,0 +1,622 @@
+import utils
+from typing import Union
+
+Score = Union[int, float]
+Move = tuple[tuple[int, int], tuple[int, int]]
+Board = list[list[str]]
+
+def evaluate(board: Board) -> Score:
+    """
+    Returns the score of the current position.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    Returns
+    -------
+    An evaluation (as a Score).
+    """
+    bcount = 0
+    wcount = 0
+    for r, row in enumerate(board):
+        for tile in row:
+            if tile == "B":
+                if r == 5:
+                    return utils.WIN
+                bcount += 1
+            elif tile == "W":
+                if r == 0:
+                    return -utils.WIN
+                wcount += 1
+    if wcount == 0:
+        return utils.WIN
+    if bcount == 0:
+        return -utils.WIN
+    return bcount - wcount
+
+def generate_valid_moves(board: Board) -> list[Move]:
+    """
+    Generates a list containing all possible moves in a particular position for black.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    Returns
+    -------
+    A list of Moves.
+    """
+    res = []
+    for r, row in enumerate(board):
+        for c, tile in enumerate(row):
+            if tile != "B":
+                continue
+            for d in (-1, 0, 1):
+                dst = r + 1, c + d
+                if utils.is_valid_move(board, (r, c), dst):
+                    res.append(((r, c), dst))
+
+    return res
+
+def test_11():
+    board1 = [
+        ["_", "_", "B", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"]
+    ]
+    assert sorted(generate_valid_moves(board1)) == [((0, 2), (1, 1)), ((0, 2), (1, 2)), ((0, 2), (1, 3))], "board1 test output is incorrect"
+
+    board2 = [
+        ["_", "_", "_", "_", "B", "_"],
+        ["_", "_", "_", "_", "_", "B"],
+        ["_", "W", "_", "_", "_", "_"],
+        ["_", "_", "W", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "B", "_", "_", "_"]
+    ]
+    assert sorted(generate_valid_moves(board2)) == [((0, 4), (1, 3)), ((0, 4), (1, 4)), ((1, 5), (2, 4)), ((1, 5), (2, 5))], "board2 test output is incorrect"
+
+    board3 = [
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "B", "_", "_", "_"],
+        ["_", "W", "W", "W", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"]
+    ]
+    assert sorted(generate_valid_moves(board3)) == [((1, 2), (2, 1)), ((1, 2), (2, 3))], "board3 test output is incorrect"
+
+# test_11()
+
+def minimax(
+        board: Board, 
+        depth: int, 
+        max_depth: int, 
+        is_black: bool
+    ) -> tuple[Score, Move]:
+    """
+    Finds the best move for the input board state.
+    Note that you are black.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    depth: int, the depth to search for the best move. When this is equal
+    to `max_depth`, you should get the evaluation of the position using
+    the provided heuristic function.
+
+    max_depth: int, the maximum depth for cutoff.
+
+    is_black: bool. True when finding the best move for black, False
+    otherwise.
+
+    Returns
+    -------
+    A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
+    evaluation: the best score that black can achieve after this move.
+    src_row, src_col: position of the pawn to move.
+    dst_row, dst_col: position to move the pawn to.
+    """
+    if depth == max_depth or utils.is_game_over(board):
+        return evaluate(board), None
+    # max value
+    if is_black:
+        v = -utils.INF
+        best_move = None
+        for action in generate_valid_moves(board):
+            next_board = utils.state_change(board, action[0], action[1], False)
+            new_v, _ = minimax(next_board, depth + 1, max_depth, False)
+            if new_v > v:
+                v = new_v
+                best_move = action
+        return v, best_move
+    # min value
+    else:
+        v = utils.INF
+        best_move = None
+        w_board = utils.invert_board(board, False)
+        for action in generate_valid_moves(w_board):
+            next_board = utils.state_change(w_board, action[0], action[1], False)
+            new_v, _ = minimax(utils.invert_board(next_board, False), depth + 1, max_depth, True)
+            if new_v < v:
+                v = new_v
+                best_move = action
+        return v, best_move
+
+
+def test_21():
+    board1 = [
+        list("______"),
+        list("___B__"),
+        list("____BB"),
+        list("___WB_"),
+        list("_B__WW"),
+        list("_WW___"),
+    ]
+    score1, _ = minimax(board1, 0, 1, True)
+    assert score1 == utils.WIN, "black should win in 1"
+
+    board2 = [
+        list("______"),
+        list("___B__"),
+        list("____BB"),
+        list("_BW_B_"),
+        list("____WW"),
+        list("_WW___"),
+    ]
+    score2, _ = minimax(board2, 0, 3, True)
+    assert score2 == utils.WIN, "black should win in 3"
+
+    board3 = [
+        list("______"),
+        list("__B___"),
+        list("_WWW__"),
+        list("______"),
+        list("______"),
+        list("______"),
+    ]
+    score3, _ = minimax(board3, 0, 4, True)
+    assert score3 == -utils.WIN, "white should win in 4"
+
+# test_21()
+
+def negamax(
+        board: Board,
+        depth: int, 
+        max_depth: int
+    ) -> tuple[Score, Move]:
+    """
+    Finds the best move for the input board state.
+    Note that you are black.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    depth: int, the depth to search for the best move. When this is equal
+    to `max_depth`, you should get the evaluation of the position using
+    the provided heuristic function.
+
+    max_depth: int, the maximum depth for cutoff.
+
+    Notice that you no longer need the parameter `is_black`.
+
+    Returns
+    -------
+    A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
+    evaluation: the best score that black can achieve after this move.
+    src_row, src_col: position of the pawn to move.
+    dst_row, dst_col: position to move the pawn to.
+    """
+    if depth == max_depth or utils.is_game_over(board):
+        return evaluate(board), None
+    v = -utils.INF
+    best_move = None
+    for action in generate_valid_moves(board):
+        # if current is black, then the next board is white. It is inverted after applying the black move
+        next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
+        # since this move is white, the score needs to be negated (to get the min value of the white move)
+        new_v, _ = negamax(next_board, depth + 1, max_depth)
+        # if the negated score is greater than the current max value, update the max value and the best move
+        if -new_v > v:
+            v = -new_v
+            best_move = action
+    return v, best_move
+
+def test_22():
+    board1 = [
+        list("______"),
+        list("___B__"),
+        list("____BB"),
+        list("___WB_"),
+        list("_B__WW"),
+        list("_WW___"),
+    ]
+    score1, _ = negamax(board1, 0, 1)
+    assert score1 == utils.WIN, "black should win in 1"
+
+    board2 = [
+        list("______"),
+        list("___B__"),
+        list("____BB"),
+        list("_BW_B_"),
+        list("____WW"),
+        list("_WW___"),
+    ]
+    score2, _ = negamax(board2, 0, 3)
+    assert score2 == utils.WIN, "black should win in 3"
+
+    board3 = [
+        list("______"),
+        list("__B___"),
+        list("_WWW__"),
+        list("______"),
+        list("______"),
+        list("______"),
+    ]
+    score3, _ = negamax(board3, 0, 4)
+    assert score3 == -utils.WIN, "white should win in 4"
+
+# test_22()
+
+def minimax_alpha_beta(
+        board: Board,
+        depth: int, 
+        max_depth: int, 
+        alpha: Score, 
+        beta: Score, 
+        is_black: bool
+    ) -> tuple[Score, Move]:
+    """
+    Finds the best move for the input board state.
+    Note that you are black.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    depth: int, the depth to search for the best move. When this is equal
+    to `max_depth`, you should get the evaluation of the position using
+    the provided heuristic function.
+
+    max_depth: int, the maximum depth for cutoff.
+
+    alpha: Score. The alpha value in a given state.
+
+    beta: Score. The beta value in a given state.
+
+    is_black: bool. True when finding the best move for black, False
+    otherwise.
+
+    Returns
+    -------
+    A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
+    evaluation: the best score that black can achieve after this move.
+    src_row, src_col: position of the pawn to move.
+    dst_row, dst_col: position to move the pawn to.
+    """
+    # base case
+    if depth == max_depth or utils.is_game_over(board):
+        return evaluate(board), None
+    
+    # max value
+    if is_black:
+        v = -utils.INF
+        best_move = None
+        for action in generate_valid_moves(board):
+            next_board = utils.state_change(board, action[0], action[1], False)
+            new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, False)
+            if new_v > v:
+                v = new_v
+                best_move = action
+            if  v >= beta:
+                break
+            alpha = max(alpha, v)
+        return v, best_move
+    # min value
+    else:
+        v = utils.INF
+        best_move = None
+        w_board = utils.invert_board(board, False)
+        for action in generate_valid_moves(w_board):
+            next_board = utils.invert_board(utils.state_change(w_board, action[0], action[1], False), False)
+            new_v, _ = minimax_alpha_beta(next_board, depth + 1, max_depth, alpha, beta, True)
+            if new_v < v:
+                v = new_v
+                best_move = action
+            if v <= alpha:
+                break
+            beta = min(beta, v)
+        return v, best_move
+
+def test_31():
+    board1 = [
+        list("______"),
+        list("__BB__"),
+        list("____BB"),
+        list("WBW_B_"),
+        list("____WW"),
+        list("_WW___"),
+    ]
+    score1, _ = minimax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF, True)
+    assert score1 == utils.WIN, "black should win in 3"
+
+
+    board2 = [
+        list("____B_"),
+        list("___B__"),
+        list("__B___"),
+        list("_WWW__"),
+        list("____W_"),
+        list("______"),
+    ]
+    score2, _ = minimax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF, True)
+    assert score2 == utils.WIN, "black should win in 5"
+
+    board3 = [
+        list("____B_"),
+        list("__BB__"),
+        list("______"),
+        list("_WWW__"),
+        list("____W_"),
+        list("______"),
+    ]
+    score3, m = minimax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF, True)
+    print(m)
+    assert score3 == -utils.WIN, "white should win in 6"
+
+# print("custom")
+# print(minimax_alpha_beta([['_', '_', '_', '_', 'B', '_'], ['_', '_', 'B', 'B', '_', '_'], ['_', '_', '_', '_', '_', '_'], ['_', 'W', 'W', 'W', '_', '_'], ['_', '_', '_', '_', 'W', '_'], ['_', '_', '_', '_', '_', '_']], 
+#                        0, 6, -utils.INF, utils.INF, True))
+# print("custom end")
+
+def negamax_alpha_beta(
+        board: Board, 
+        depth: int, 
+        max_depth: int, 
+        alpha: Score, 
+        beta: Score
+    ) -> tuple[Score, Move]:
+    """
+    Finds the best move for the input board state.
+    Note that you are black.
+
+    Parameters
+    ----------
+    board: 2D list of lists. Contains characters "B", "W", and "_",
+    representing black pawn, white pawn, and empty cell, respectively.
+
+    depth: int, the depth to search for the best move. When this is equal
+    to `max_depth`, you should get the evaluation of the position using
+    the provided heuristic function.
+
+    max_depth: int, the maximum depth for cutoff.
+
+    alpha: Score. The alpha value in a given state.
+
+    beta: Score. The beta value in a given state.
+
+    Notice that you no longer need the parameter `is_black`.
+
+    Returns
+    -------
+    A tuple (evalutation, ((src_row, src_col), (dst_row, dst_col))):
+    evaluation: the best score that black can achieve after this move.
+    src_row, src_col: position of the pawn to move.
+    dst_row, dst_col: position to move the pawn to.
+    """
+    if depth == max_depth or utils.is_game_over(board):
+        return evaluate(board), None
+    best_move = None
+    for action in generate_valid_moves(board):
+        # if current is black, then the next board is white. It is inverted after applying the black move
+        next_board = utils.invert_board(utils.state_change(board, action[0], action[1], False), False)
+        # since this move is white, the score needs to be negated (to get the min value of the white move)
+        new_v, _ = negamax_alpha_beta(next_board, depth + 1, max_depth, -beta, -alpha)
+        # if the negated score is greater than the current max value, update the max value and the best move
+        if -new_v > alpha:
+            alpha = -new_v
+            best_move = action
+        if alpha >= beta:
+            break
+    return alpha, best_move
+
+def test_32():
+    board1 = [
+        list("______"),
+        list("__BB__"),
+        list("____BB"),
+        list("WBW_B_"),
+        list("____WW"),
+        list("_WW___"),
+    ]
+    score1, _ = negamax_alpha_beta(board1, 0, 3, -utils.INF, utils.INF)
+    assert score1 == utils.WIN, "black should win in 3"
+
+    board2 = [
+        list("____B_"),
+        list("___B__"),
+        list("__B___"),
+        list("_WWW__"),
+        list("____W_"),
+        list("______"),
+    ]
+    score2, _ = negamax_alpha_beta(board2, 0, 5, -utils.INF, utils.INF)
+    assert score2 == utils.WIN, "black should win in 5"
+
+    board3 = [
+        list("____B_"),
+        list("__BB__"),
+        list("______"),
+        list("_WWW__"),
+        list("____W_"),
+        list("______"),
+    ]
+    score3, _ = negamax_alpha_beta(board3, 0, 6, -utils.INF, utils.INF)
+    assert score3 == -utils.WIN, "white should win in 6"
+
+# test_32()
+
+# Uncomment and implement the function.
+# Note: this will override the provided `evaluate` function.
+
+def evaluate(board: Board) -> Score:
+    score = 0
+    bcount = 0
+    wcount = 0
+    for r, row in enumerate(board):
+        for tile in row:
+            if tile == "B":
+                bcount += 1
+                if r == 0:
+                    score += 10
+                if r == 1:
+                    score += 10
+                if r == 2:
+                    score += 20
+                if r == 3:
+                    score += 40
+                if r == 4:
+                    score += 50
+                if r == 5:
+                    return utils.WIN
+            elif tile == "W":
+                wcount += 1
+                if r == 0:
+                    return -utils.WIN
+                if r == 1:
+                    score -= 50
+                if r == 2:
+                    score -= 40
+                if r == 3:
+                    score -= 20
+                if r == 4:
+                    score -= 10
+                if r == 5:
+                    score -= 10
+    if wcount == 0:
+        return utils.WIN
+    if bcount == 0:
+        return -utils.WIN
+    return score
+
+def test_41():
+    board1 = [
+        ["_", "_", "_", "B", "_", "_"],
+        ["_", "_", "_", "W", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "B", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"]
+    ]
+    assert evaluate(board1) == 0
+
+    board2 = [
+        ["_", "_", "_", "B", "W", "_"],
+        ["_", "_", "_", "W", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"]
+    ]
+    assert evaluate(board2) == -utils.WIN
+
+    board3 = [
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "B", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"],
+        ["_", "_", "_", "_", "_", "_"]
+    ]
+    assert evaluate(board3) == utils.WIN
+
+test_41()
+
+class PlayerAI:
+
+    def make_move(self, board: Board) -> Move:
+        """
+        This is the function that will be called from main.py
+        You should combine the functions in the previous tasks
+        to implement this function.
+
+        Parameters
+        ----------
+        self: object instance itself, passed in automatically by Python.
+        
+        board: 2D list-of-lists. Contains characters "B", "W", and "_",
+        representing black pawn, white pawn, and empty cell, respectively.
+        
+        Returns
+        -------
+        A tuple of tuples containing coordinates (row_index, col_index).
+        The first tuple contains the source position of the black pawn
+        to be moved, the second tuple contains the destination position.
+        """
+        # TODO: Replace starter code with your AI
+        ################
+        # Starter code #
+        ################
+        for r in range(len(board)):
+            for c in range(len(board[r])):
+                # check if B can move forward directly
+                if board[r][c] == "B" and board[r + 1][c] == "_":
+                    src = r, c
+                    dst = r + 1, c
+                    return src, dst # valid move
+        return (0, 0), (0, 0) # invalid move
+
+class PlayerNaive:
+    """
+    A naive agent that will always return the first available valid move.
+    """
+    def make_move(self, board: Board) -> Move:
+        return utils.generate_rand_move(board)
+
+##########################
+# Game playing framework #
+##########################
+if __name__ == "__main__":
+    assert utils.test_move([
+        ["B", "B", "B", "B", "B", "B"], 
+        ["_", "B", "B", "B", "B", "B"], 
+        ["_", "_", "_", "_", "_", "_"], 
+        ["_", "B", "_", "_", "_", "_"], 
+        ["_", "W", "W", "W", "W", "W"], 
+        ["W", "W", "W", "W", "W", "W"]
+    ], PlayerAI())
+
+    assert utils.test_move([
+        ["_", "B", "B", "B", "B", "B"], 
+        ["_", "B", "B", "B", "B", "B"], 
+        ["_", "_", "_", "_", "_", "_"], 
+        ["_", "B", "_", "_", "_", "_"], 
+        ["W", "W", "W", "W", "W", "W"], 
+        ["_", "_", "W", "W", "W", "W"]
+    ], PlayerAI())
+
+    assert utils.test_move([
+        ["_", "_", "B", "B", "B", "B"], 
+        ["_", "B", "B", "B", "B", "B"], 
+        ["_", "_", "_", "_", "_", "_"], 
+        ["_", "B", "W", "_", "_", "_"], 
+        ["_", "W", "W", "W", "W", "W"], 
+        ["_", "_", "_", "W", "W", "W"]
+    ], PlayerAI())
+
+    # generates initial board
+    # board = utils.generate_init_state()
+    # res = utils.play(PlayerAI(), PlayerNaive(), board)
+    # # Black wins means your agent wins.
+    # print(res)