TicTacToe Alpha Beta Pruning

编辑2021年3月30日：问题措辞非常糟糕，重新表述了

我在Python中实现了一个阿尔法-贝塔修剪算法，我想知道它不走最快的胜利路线是否正常(有时它会在2步中获胜，而它本可以在1步中获胜(。

import math
from collections import Counter
from copy import copy, deepcopy
""" Board Class Definition """
class Board:
""" constructor """
def __init__(self):
# init data
self.data = [ "." for i in range(9) ]


""" copy constructor equivalent """
@staticmethod
def copy(board):
return deepcopy(board)


""" play at given coordinates """
def play_at(self, position, color):
# check if you can play
if self.data[position] == ".":
# make the move
self.data[position] = color
return True

# did not play
return False


""" get coordinates of empty pieces on the board """
def get_playable_coord(self):
# define coordinates of empty tiles
return [ i for i in range(9) if self.data[i] == "." ]


""" board is full """
def is_full(self):
# define tile counter
c = Counter( [ self.data[i] for i in range(9) ] )
return ( c["x"] + c["o"] == 9 )


""" get winner of the board """
def get_winner(self):
# straight lines to check
straightLines = [ (0, 1, 2) , (3, 4, 5) , (6, 7, 8) , (0, 3, 6) , (1, 4, 7) , (2, 5, 8) , (0, 4, 8) , (2, 4, 6) ]

# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )

# different scenarii
if c["x"] == 3:
return "x"

elif c["o"] == 3:
return "o"

# if board is full, game is a draw
if self.is_full():
return "draw"

# return None by default
return None


""" get heuristic value of board - for "x" if 'reverse' == False """
def get_heuristic_value(self, reverse):
# init variable
value = 0

# straight lines to check
straightLines = [ (0, 1, 2) , (3, 4, 5) , (6, 7, 8) , (0, 3, 6) , (1, 4, 7) , (2, 5, 8) , (0, 4, 8) , (2, 4, 6) ]

# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )

# different scenarii
if c["x"] == 3:
value += 100

elif c["x"] == 2 and c["."] == 1:
value += 10

elif c["x"] == 1 and c["."] == 2:
value += 1

elif c["o"] == 3:
value -= 100

elif c["o"] == 2 and c["."] == 1:
value -= 10

elif c["o"] == 1 and c["."] == 2:
value -= 1

# return heuristic value
if reverse:
return -value
else:
return value

""" Model Class Definition """
class Model:
""" constructor """
def __init__(self, color):
# define parameters
self.color = color
self.other = self.get_opponent(color)

# define board
self.board = Board()

# define winner
self.winner = None

# 'x' plays first
if self.other == "x":
self.make_ai_move()


""" get opponent """
def get_opponent(self, player):
if player == "x":
return "o"
return "x"


""" player makes a move in given position """
def make_player_move(self, pos):
if self.winner is None:
# get result of board method
res = self.board.play_at(pos, self.color)

# check end of game <?>
self.winner = self.board.get_winner()

if res and self.winner is None:
# make AI move
self.make_ai_move()


""" AI makes a move by using alphabeta pruning on all child nodes """
def make_ai_move(self):
# init variables
best, bestValue = None, - math.inf

for i in self.board.get_playable_coord():
# copy board as child
copie = Board.copy(self.board)
copie.play_at(i, self.other)

# use alpha beta && (potentially) register play
value = self.alphabeta(copie, 10, - math.inf, math.inf, False)
if value > bestValue:
best, bestValue = i, value

# play at best coordinates
self.board.play_at(best, self.other)

# check end of game <?>
self.winner = self.board.get_winner()


""" alpha beta function (minimax optimization) """
def alphabeta(self, node, depth, alpha, beta, maximizingPlayer):
# ending condition
if depth == 0 or node.get_winner() is not None:
return node.get_heuristic_value(self.other == "o")

# recursive part initialization
if maximizingPlayer:
value = - math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos, self.other)
value = max(value, self.alphabeta(child, depth-1, alpha, beta, False))

# update alpha
alpha = max(alpha, value)
if alpha >= beta:
break
return value

else:
value = math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos, self.color)
value = min(value, self.alphabeta(child, depth-1, alpha, beta, True))

# update beta
beta = min(beta, value)
if beta <= alpha:
break
return value

我对这个问题的结论：

Alpha Beta Pruning是深度优先搜索算法，而不是广度优先搜索算法。所以我认为，无论深度如何，它都会选择找到的第一条路线，而不是搜索最快的路线，这是很自然的。。。

我知道这不是问题的答案，但我想为AI战术玩家提出一个更简单的方法，包括计算位置是赢还是输。这需要考虑游戏中任何时候可能发生的所有有效位置，但由于场地是3x3，有效位置的数量小于3^9=19683(每个位置都是"x"、"o"或"(。这不是一个硬性限制，因为从游戏规则的角度来看，很多位置都是无效的。我建议你从这里开始，因为你所说的算法主要用于更难的游戏，在这些游戏中，完全搜索是不可行的。

因此，您所需要做的就是在启动程序后为每个位置计算一次输赢指标，然后在O(1(中做出决定。这对于3x3字段来说是可以接受的，但可能不会更多。

一般方法如下所述：https://cp-algorithms.com/game_theory/games_on_graphs.html.简言之，你建立了一个可能的动作树，将树叶标记为获胜或失败，并通过考虑所有子角色的过渡来逐步上升(例如，如果每个过渡都导致对方玩家处于获胜位置，则为失败的位置(。

如果你懂俄语，这里有一个原始页面的链接：http://e-maxx.ru/algo/games_on_graphs

附言：我在过去的某个时候也玩过这个游戏，并实施了这种方法。这是我的回购，以备您调查：https://github.com/yuuurchyk/cpp_tic_tac_toe.公平警告：它是用C++编写的，代码有点难看

相关内容

最新更新

热门标签：