基于概率型的蒙特卡洛搜索树实现的简易不围棋
算法:蒙特卡洛搜索树(MCTS) 马尔代夫决策过程(UCB公式) 深度优先搜索(DFS)
这个是参加全国计算机博弈大赛前,培训我的队员用的代码。其中UCB公式中的常量值是随便写的,具体什么数值最合适,需要用深度学习神经网络寻找最佳值。
//头文件Checkboard.h
#ifndef CHECKERBOARD_H
#define CHECKERBOARD_H
const int BOARDSIZE = 9;
class Checkerboard {
protected:
int board[2][BOARDSIZE][BOARDSIZE]; //存储棋盘状态[0:自己的棋盘, 1:对手的棋盘]
bool couldInit[2][BOARDSIZE][BOARDSIZE]; //存储棋盘上各点是否可放置棋子[0:自己的棋盘, 1:对手的棋盘]
bool visited[2][BOARDSIZE][BOARDSIZE]; //存储dfs搜索当前各点是否被访问
public:
Checkerboard(); //构造函数初始化棋盘
Checkerboard(int copyboard[BOARDSIZE][BOARDSIZE]); //构造函数导入现有棋盘
void showBoard(int who); //打印棋盘
void returnBoard(int copyBoard[BOARDSIZE][BOARDSIZE]); //返回当前棋盘
bool judgeRangle(int x, int y); //判断棋子落点是否在棋盘内
bool dfsJudgeAir(int who, int x, int y); //深度搜索判断有无气
bool judgeOutcome(int who, int x, int y, int color); //判断棋局的胜负
bool initBoard(int x, int y, int color); //将棋子放置在棋盘
int getCanInitCount(int who); //计算棋盘上可以放置棋子的点的数量
};
#endif // !CHECKERBOARD_H
//源文件Checkboard.cpp
#include "Checkboard.h"
#include <iostream>
using namespace std;
const int dir[2][4] = {{-1, 0, 1, 0}, {0, -1, 0, 1}};
Checkerboard::Checkerboard()
{
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
{
board[0][i][j] = 0;
board[1][i][j] = 0;
}
}
Checkerboard::Checkerboard(int copyboard[BOARDSIZE][BOARDSIZE])
{
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
{
board[0][i][j] = copyboard[i][j];
board[1][i][j] = -copyboard[i][j];
}
}
void Checkerboard::returnBoard(int copyBoard[BOARDSIZE][BOARDSIZE])
{
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
copyBoard[i][j] = -board[0][i][j];
}
void Checkerboard::showBoard(int who)
{
cout << "<Board>\t";
for (int i = 0; i < BOARDSIZE; ++i)
cout << "-" << i << "-\t";
cout << endl;
for (int i = 0; i < BOARDSIZE; ++i)
{
cout << "-" << i << "-\t";
for (int j = 0; j < BOARDSIZE; ++j)
{
cout << " " << board[who][i][j] << "\t";
}
cout << endl;
}
}
bool Checkerboard::judgeRangle(int x, int y)
{
if (x >= 0 && x < BOARDSIZE && y >= 0 && y < BOARDSIZE)
return true;
return false;
}
bool Checkerboard::dfsJudgeAir(int who, int x, int y)
{
visited[who][x][y] = true;
bool visit = false;
for (int i = 0; i < 4; ++i)
{
int dx = x + dir[0][i], dy = y + dir[1][i];
if (judgeRangle(dx, dy))
{
if (board[who][dx][dy] == 0)
visit = true;
if (board[who][dx][dy] == board[who][x][y] && !visited[who][dx][dy])
if (dfsJudgeAir(who, dx, dy))
visit = true;
}
}
return visit;
}
bool Checkerboard::judgeOutcome(int who, int x, int y, int color)
{
if (!judgeRangle(x, y)) return false;
if (board[who][x][y]) return false;
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
visited[who][i][j] = false;
board[who][x][y] = color;
if (!dfsJudgeAir(who, x, y))
{
board[who][x][y] = 0;
return false;
}
for (int i = 0; i < 4; ++i)
{
int dx = x + dir[0][i], dy = y + dir[1][i];
if (judgeRangle(dx, dy))
{
if (board[who][dx][dy] && !visited[who][dx][dy])
{
if (!dfsJudgeAir(who, dx, dy))
{
board[who][x][y] = 0;
return false;
}
}
}
}
board[who][x][y] = 0;
return true;
}
bool Checkerboard::initBoard(int x, int y, int color)
{
if (judgeOutcome(0, x, y, color))
{
board[0][x][y] = color;
board[1][x][y] = -color;
return true;
}
cout << ((color == -1) ? "Black" : "White" ) << " failed!" << endl;
return false;
}
int Checkerboard::getCanInitCount(int who)
{
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
couldInit[who][i][j] = false;
int count = 0;
for (int x = 0; x < BOARDSIZE; ++x)
{
for (int y = 0; y < BOARDSIZE; ++y)
{
if (judgeOutcome(who, x, y, 1))
{
++count;
couldInit[who][x][y] = true;
}
}
}
return count;
}
//头文件MonteCarloTree.h
#ifndef MONTECARLOTRESS_H
#define MONTECARLOTRESS_H
#include "Checkerboard.h"
const int SIZE = BOARDSIZE * BOARDSIZE;
class MonteCarloTree : public Checkerboard {
private:
MonteCarloTree* parent; //存储蒙特卡洛树的根结点
MonteCarloTree* child[SIZE]; //存储蒙特卡洛树的子结点
int childPosition[SIZE][2]; //当前棋子准备移动的位置
int childCanInitCount; //当前可放置棋子的位置的数量
int childCanInitCountAll; //子树中所有可放置棋子的位置的数量
double value; //当前结点的总value值
int n; //当前结点的搜索次数,UCB计算公式中的n[i]值
double UCB; //当前结点的UCB值
int *count; //总结点点数的指针
public:
//构造函数初始化蒙特卡洛树
MonteCarloTree(int lastBoard[BOARDSIZE][BOARDSIZE], int lastPosition[2], MonteCarloTree* lastParent, int* lastCount);
int returnChildCanInitCount(); //返回当前结点可放置棋子的位置的数量
MonteCarloTree* returnChildNode(int position); //返回当前当前结点的子结点的地址
double returnValue(); //返回当前结点的总value值
int* returnChildPosition(int position); //返回棋子的走法
void getChildCanInitCount(); //计算出子树中可放置棋子的位置的数量
int getUCB(int position); //计算UCB值的公式
MonteCarloTree* selection(); //选择出value值最大的结点
double simulation(); //模拟出敌人的对局状态
void backpropagation(double dValue); //反向广播估值
};
#endif // !MONTECARLOTRESS_H
//源文件MonteCarloTree.cpp
#include "MonteCarloTree.h"
#include <cmath>
using namespace std;
const double CONSTANT = 20;
MonteCarloTree::MonteCarloTree(int lastBoard[BOARDSIZE][BOARDSIZE], int lastPosition[2], MonteCarloTree* lastParent, int* lastCount)
{
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
{
board[0][i][j] = -lastBoard[i][j];
board[1][i][j] = lastBoard[i][j];
}
if (lastPosition[0] >= 0 && lastPosition[0] < BOARDSIZE && lastPosition[1] >= 0 && lastPosition[1] < BOARDSIZE)
{
board[0][lastPosition[0]][lastPosition[1]] = -1;
//board[1][lastPosition[0]][lastPosition[1]] = 1;
}
parent = lastParent;
value = 0;
n = 0;
childCanInitCount = 0;
count = lastCount;
this->getChildCanInitCount();
}
int MonteCarloTree::returnChildCanInitCount() {return childCanInitCount;}
MonteCarloTree* MonteCarloTree::returnChildNode(int position) {return child[position];}
double MonteCarloTree::returnValue() {return value;}
int* MonteCarloTree::returnChildPosition(int position) {return childPosition[position];}
void MonteCarloTree::getChildCanInitCount()
{
int canInitPositionCount = getCanInitCount(0);
int canInitPositions[SIZE];
int canInitNum = 0;
for (int i = 0; i < BOARDSIZE; ++i)
{
for (int j = 0; j < BOARDSIZE; ++j)
{
if (couldInit[0][i][j])
{
canInitPositions[canInitNum] = i * BOARDSIZE + j;
++canInitNum;
}
}
}
childCanInitCountAll = canInitPositionCount;
for (int i = 0; i < canInitPositionCount; ++i)
{
childPosition[i][0] = canInitPositions[i] / BOARDSIZE;
childPosition[i][1] = canInitPositions[i] % BOARDSIZE;
}
}
int MonteCarloTree::getUCB(int position)
{
return (double(child[position]->value) / double(child[position]->n))
+ CONSTANT * sqrt(log(double(*count)) / (double(child[position]->n)));
}
MonteCarloTree* MonteCarloTree::selection()
{
if (childCanInitCountAll == 0) return this; //无可放置位置
if (childCanInitCountAll > childCanInitCount)
{ //为叶子结点,Expansion新结点
MonteCarloTree* nextNode = new MonteCarloTree(board[0], childPosition[childCanInitCount], this, count);
child[childCanInitCount] = nextNode;
++childCanInitCount;
return nextNode;
}
for (int i = 0; i < childCanInitCount; ++i)
child[i]->UCB = getUCB(i); //非叶子结点,计算UCB值
int bestNode = 0;
double maxUCB = 0;
for (int i = 0; i < childCanInitCount; ++i)
{ //Selection最佳结点
if (maxUCB < child[i]->UCB)
{
bestNode = i;
maxUCB = child[i]->UCB;
}
}
return child[bestNode]->selection(); //搜索最佳结点
}
double MonteCarloTree::simulation()
{ //模拟双方的对峙状态
for (int i = 0; i < BOARDSIZE; ++i)
for (int j = 0; j < BOARDSIZE; ++j)
board[1][i][j] = -board[0][i][j];
int myCanInitCount = getCanInitCount(0);
int rivalCanInitCount = getCanInitCount(1);
return myCanInitCount - rivalCanInitCount;
}
void MonteCarloTree::backpropagation(double dValue)
{
MonteCarloTree* p = this;
int who = 0;
while (p != nullptr)
{
if (who == 1)
{
p->value += dValue;
--who;
}
else
{
p->value -= dValue;
++who;
}
++p->n;
p = p->parent;
}
}
//主函数部分main.cpp
#include <iostream>
#include <random>
#include <ctime>
#include "MonteCarloTree.h"
using namespace std;
int main()
{
srand((int)clock());
int count = 0; //总计算的节点数(UCB计算公式中的N)
int number = 1; //记录回合数
int board[BOARDSIZE][BOARDSIZE] = {0};
int lastPosition[2] = {0, 0}; //对手最后的走棋位置
long double TIMEOUT = 0; //初始运算时间
double ADD = 0; //递增倍率
cout << "Input the initial machine operation time(ms): ";
cin >> TIMEOUT;
cout << "Incremental rate of operation time: ";
cin >> ADD;
cout << "--------------------" << endl;
cout << "No." << number++ << " Round" << "Input your move:" << endl;
cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[1];
cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[0];
cout << "--------------------" << endl;
cout << "Your board:" << endl;
cout << "Your move: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
board[lastPosition[0]][lastPosition[1]] = 1;
MonteCarloTree tree(board, lastPosition, nullptr, &count); //创建根节点
tree.showBoard(1);
int start = clock();
while (clock() - start < TIMEOUT)
{
count++; //计算节点数
MonteCarloTree* node = tree.selection(); //Expansion一个新结点
node->backpropagation(node->simulation()); //计算并反向广播估值
}
TIMEOUT += TIMEOUT * ADD;
int bestNode[SIZE] = {0}; //所有最优子结点的位置
int bestChildNum = 0; //最优结节点个数
double maxValue = INT_MIN; //当前最优子结点的value值
for (int i = 0; i < tree.returnChildCanInitCount(); ++i)
{
if (maxValue < tree.returnChildNode(i)->returnValue())
{
//for (int j = 0; j < SIZE; ++j) bestNode[j] = 0;
for (int& e : bestNode) e = 0;
bestChildNum = 0;
bestNode[bestChildNum++] = i;
maxValue = tree.returnChildNode(i)->returnValue();
}
else if (maxValue == tree.returnChildNode(i)->returnValue())
{
bestNode[bestChildNum++] = i;
}
}
int position = rand() % bestChildNum; //随机选择一个最优走法
int* bestPosition = tree.returnChildPosition(bestNode[position]);
tree.initBoard(bestPosition[0], bestPosition[1], 1);
cout << "--------------------" << endl;
cout << "Machine's board:" << endl;
cout << "Machine's move: White -> (" << bestPosition[1] << ", " << bestPosition[0] << ")" << endl;
tree.showBoard(1);
int copyBoard[BOARDSIZE][BOARDSIZE] = {0};
tree.returnBoard(copyBoard);
cout << "--------------------" << endl;
cout << "No." << number++ << " Round" << "Input your move:" << endl;
cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[1];
cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[0];
cout << "--------------------" << endl;
if(!tree.initBoard(lastPosition[0], lastPosition[1], -1)) return 0;
cout << "Your board:" << endl;
cout << "Your move: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
tree.showBoard(1);
tree.returnBoard(copyBoard);
while (true)
{
count = 0;
MonteCarloTree newtree(copyBoard, lastPosition, nullptr, &count); //创建根节点
int newstart = clock();
while (clock() - newstart < TIMEOUT)
{
count++; //计算节点数
MonteCarloTree* node = newtree.selection(); //Expansion一个新结点
node->backpropagation(node->simulation()); //计算并反向广播估值
}
TIMEOUT += TIMEOUT * ADD;
int newbestNode[SIZE] = {0}; //所有最优子结点的位置
bestChildNum = 0; //最优结节点个数
maxValue = INT_MIN; //当前最优子结点的value值
for (int i = 0; i < newtree.returnChildCanInitCount(); ++i)
{
if (maxValue < newtree.returnChildNode(i)->returnValue())
{
//for (int j = 0; j < SIZE; ++j) newbestNode[j] = 0;
for (int& e : newbestNode) e = 0;
bestChildNum = 0;
newbestNode[bestChildNum++] = i;
maxValue = newtree.returnChildNode(i)->returnValue();
}
else if (maxValue == newtree.returnChildNode(i)->returnValue())
{
newbestNode[bestChildNum++] = i;
}
}
position = rand() % bestChildNum; //随机选择一个最优走法
bestPosition = newtree.returnChildPosition(newbestNode[position]);
cout << "--------------------" << endl;
cout << "Machine's board:" << endl;
cout << "Machine's move: White -> (" << bestPosition[1] << ", " << bestPosition[0] << ")" << endl;
if(!newtree.initBoard(bestPosition[0], bestPosition[1], 1))
{
newtree.showBoard(1);
break;
}
newtree.showBoard(1);
cout << "--------------------" << endl;
cout << "No." << number++ << " Round" << "Input your move:" << endl;
cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[1];
cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
cin >> lastPosition[0];
cout << "--------------------" << endl;
cout << "Your board:" << endl;
cout << "Your: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
if(!newtree.initBoard(lastPosition[0], lastPosition[1], -1))
{
newtree.showBoard(1);
break;
}
newtree.showBoard(1);
newtree.returnBoard(copyBoard);
}
return 0;
}