历史记录

清除记录

猜你想搜

AcWing热点
App
登录/注册
基于概率型的蒙特卡洛搜索树实现的简易不围棋

作者：
Yae.SakuRain , 2023-06-05 12:28:01 , 所有人可见 , 阅读 206
4

3
基于概率型的蒙特卡洛搜索树实现的简易不围棋

算法：蒙特卡洛搜索树(MCTS) 马尔代夫决策过程(UCB公式) 深度优先搜索(DFS)

这个是参加全国计算机博弈大赛前，培训我的队员用的代码。其中UCB公式中的常量值是随便写的，具体什么数值最合适，需要用深度学习神经网络寻找最佳值。
//头文件Checkboard.h
#ifndef CHECKERBOARD_H
#define CHECKERBOARD_H

const int BOARDSIZE = 9;

class Checkerboard {
protected:
    int board[2][BOARDSIZE][BOARDSIZE];                     //存储棋盘状态[0:自己的棋盘, 1:对手的棋盘]
    bool couldInit[2][BOARDSIZE][BOARDSIZE];                //存储棋盘上各点是否可放置棋子[0:自己的棋盘, 1:对手的棋盘]
    bool visited[2][BOARDSIZE][BOARDSIZE];                  //存储dfs搜索当前各点是否被访问
public:
    Checkerboard();                                         //构造函数初始化棋盘
    Checkerboard(int copyboard[BOARDSIZE][BOARDSIZE]);      //构造函数导入现有棋盘
    void showBoard(int who);                                //打印棋盘
    void returnBoard(int copyBoard[BOARDSIZE][BOARDSIZE]);  //返回当前棋盘
    bool judgeRangle(int x, int y);                         //判断棋子落点是否在棋盘内
    bool dfsJudgeAir(int who, int x, int y);                //深度搜索判断有无气
    bool judgeOutcome(int who, int x, int y, int color);    //判断棋局的胜负
    bool initBoard(int x, int y, int color);                //将棋子放置在棋盘
    int getCanInitCount(int who);                           //计算棋盘上可以放置棋子的点的数量
};


#endif // !CHECKERBOARD_H

//源文件Checkboard.cpp
#include "Checkboard.h"
#include <iostream>

using namespace std;

const int dir[2][4] = {{-1, 0, 1, 0}, {0, -1, 0, 1}};

Checkerboard::Checkerboard()
{
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
        {
            board[0][i][j] = 0;
            board[1][i][j] = 0;
        }
}

Checkerboard::Checkerboard(int copyboard[BOARDSIZE][BOARDSIZE])
{
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
        {
            board[0][i][j] = copyboard[i][j];
            board[1][i][j] = -copyboard[i][j];
        }
}

void Checkerboard::returnBoard(int copyBoard[BOARDSIZE][BOARDSIZE])
{
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
            copyBoard[i][j] = -board[0][i][j];
}

void Checkerboard::showBoard(int who)
{
    cout << "<Board>\t";
    for (int i = 0; i < BOARDSIZE; ++i)
        cout << "-" << i << "-\t";
    cout << endl;
    for (int i = 0; i < BOARDSIZE; ++i)
    {
        cout << "-" << i << "-\t";
        for (int j = 0; j < BOARDSIZE; ++j)
        {
            cout << " " << board[who][i][j] << "\t";
        }
        cout << endl;
    }
}

bool Checkerboard::judgeRangle(int x, int y)
{
    if (x >= 0 && x < BOARDSIZE && y >= 0 && y < BOARDSIZE)
        return true;
    return false;
}

bool Checkerboard::dfsJudgeAir(int who, int x, int y)
{
    visited[who][x][y] = true;
    bool visit = false;
    for (int i = 0; i < 4; ++i)
    {
        int dx = x + dir[0][i], dy = y + dir[1][i];
        if (judgeRangle(dx, dy))
        {
            if (board[who][dx][dy] == 0)
                visit = true;
            if (board[who][dx][dy] == board[who][x][y] && !visited[who][dx][dy])
                if (dfsJudgeAir(who, dx, dy))
                    visit = true;
        }
    }
    return visit;
}

bool Checkerboard::judgeOutcome(int who, int x, int y, int color)
{
    if (!judgeRangle(x, y)) return false;
    if (board[who][x][y]) return false;
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
            visited[who][i][j] = false;
    board[who][x][y] = color;
    if (!dfsJudgeAir(who, x, y))
    {
        board[who][x][y] = 0;
        return false;
    }
    for (int i = 0; i < 4; ++i)
    {
        int dx = x + dir[0][i], dy = y + dir[1][i];
        if (judgeRangle(dx, dy))
        {
            if (board[who][dx][dy] && !visited[who][dx][dy])
            {
                if (!dfsJudgeAir(who, dx, dy))
                {
                    board[who][x][y] = 0;
                    return false;
                }
            }
        }
    }
    board[who][x][y] = 0;
    return true;
}

bool Checkerboard::initBoard(int x, int y, int color)
{
    if (judgeOutcome(0, x, y, color))
    {
        board[0][x][y] = color;
        board[1][x][y] = -color;
        return true;
    }
    cout << ((color == -1) ? "Black" : "White" ) << " failed!" << endl;
    return false;
}

int Checkerboard::getCanInitCount(int who)
{
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
            couldInit[who][i][j] = false;
    int count = 0;
    for (int x = 0; x < BOARDSIZE; ++x)
    {
        for (int y = 0; y < BOARDSIZE; ++y)
        {
            if (judgeOutcome(who, x, y, 1))
            {
                ++count;
                couldInit[who][x][y] = true;
            }
        }
    }
    return count;
}


//头文件MonteCarloTree.h
#ifndef MONTECARLOTRESS_H
#define MONTECARLOTRESS_H

#include "Checkerboard.h"

const int SIZE = BOARDSIZE * BOARDSIZE;

class MonteCarloTree : public Checkerboard {
private:
    MonteCarloTree* parent;                         //存储蒙特卡洛树的根结点
    MonteCarloTree* child[SIZE];                    //存储蒙特卡洛树的子结点
    int childPosition[SIZE][2];                     //当前棋子准备移动的位置
    int childCanInitCount;                          //当前可放置棋子的位置的数量
    int childCanInitCountAll;                       //子树中所有可放置棋子的位置的数量
    double value;                                   //当前结点的总value值
    int n;                                          //当前结点的搜索次数，UCB计算公式中的n[i]值
    double UCB;                                     //当前结点的UCB值
    int *count;                                     //总结点点数的指针
public:
    //构造函数初始化蒙特卡洛树
    MonteCarloTree(int lastBoard[BOARDSIZE][BOARDSIZE], int lastPosition[2], MonteCarloTree* lastParent, int* lastCount);
    int returnChildCanInitCount();                  //返回当前结点可放置棋子的位置的数量
    MonteCarloTree* returnChildNode(int position);  //返回当前当前结点的子结点的地址
    double returnValue();                           //返回当前结点的总value值
    int* returnChildPosition(int position);         //返回棋子的走法
    void getChildCanInitCount();                    //计算出子树中可放置棋子的位置的数量
    int getUCB(int position);                       //计算UCB值的公式
    MonteCarloTree* selection();                    //选择出value值最大的结点
    double simulation();                            //模拟出敌人的对局状态
    void backpropagation(double dValue);            //反向广播估值
};


#endif // !MONTECARLOTRESS_H


//源文件MonteCarloTree.cpp
#include "MonteCarloTree.h"
#include <cmath>

using namespace std;

const double CONSTANT = 20;

MonteCarloTree::MonteCarloTree(int lastBoard[BOARDSIZE][BOARDSIZE], int lastPosition[2], MonteCarloTree* lastParent, int* lastCount)
{
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
        {
            board[0][i][j] = -lastBoard[i][j];
            board[1][i][j] = lastBoard[i][j];
        }
    if (lastPosition[0] >= 0 && lastPosition[0] < BOARDSIZE && lastPosition[1] >= 0 && lastPosition[1] < BOARDSIZE)
    {
        board[0][lastPosition[0]][lastPosition[1]] = -1;
        //board[1][lastPosition[0]][lastPosition[1]] = 1;
    }
    parent = lastParent;
    value = 0;
    n = 0;
    childCanInitCount = 0;
    count = lastCount;
    this->getChildCanInitCount();
}

int MonteCarloTree::returnChildCanInitCount() {return childCanInitCount;}

MonteCarloTree* MonteCarloTree::returnChildNode(int position) {return child[position];}

double MonteCarloTree::returnValue() {return value;}

int* MonteCarloTree::returnChildPosition(int position) {return childPosition[position];}

void MonteCarloTree::getChildCanInitCount()
{
    int canInitPositionCount = getCanInitCount(0);
    int canInitPositions[SIZE];
    int canInitNum = 0;
    for (int i = 0; i < BOARDSIZE; ++i)
    {
        for (int j = 0; j < BOARDSIZE; ++j)
        {
            if (couldInit[0][i][j])
            {
                canInitPositions[canInitNum] = i * BOARDSIZE + j;
                ++canInitNum;
            }
        }
    }
    childCanInitCountAll = canInitPositionCount;
    for (int i = 0; i < canInitPositionCount; ++i)
    {
        childPosition[i][0] = canInitPositions[i] / BOARDSIZE;
        childPosition[i][1] = canInitPositions[i] % BOARDSIZE;
    }
}

int MonteCarloTree::getUCB(int position)
{
    return (double(child[position]->value) / double(child[position]->n))
    + CONSTANT * sqrt(log(double(*count)) / (double(child[position]->n)));
}

MonteCarloTree* MonteCarloTree::selection()
{
    if (childCanInitCountAll == 0) return this; //无可放置位置
    if (childCanInitCountAll > childCanInitCount)
    { //为叶子结点,Expansion新结点
        MonteCarloTree* nextNode = new MonteCarloTree(board[0], childPosition[childCanInitCount], this, count);
        child[childCanInitCount] = nextNode;
        ++childCanInitCount;
        return nextNode;
    }
    for (int i = 0; i < childCanInitCount; ++i)
        child[i]->UCB = getUCB(i);          //非叶子结点,计算UCB值
    int bestNode = 0;
    double maxUCB = 0;
    for (int i = 0; i < childCanInitCount; ++i)
    { //Selection最佳结点
        if (maxUCB < child[i]->UCB)
        {
            bestNode = i;
            maxUCB = child[i]->UCB;
        }
    }
    return child[bestNode]->selection();            //搜索最佳结点
}

double MonteCarloTree::simulation()
{ //模拟双方的对峙状态
    for (int i = 0; i < BOARDSIZE; ++i)
        for (int j = 0; j < BOARDSIZE; ++j)
            board[1][i][j] = -board[0][i][j];
    int myCanInitCount = getCanInitCount(0);
    int rivalCanInitCount = getCanInitCount(1);
    return myCanInitCount - rivalCanInitCount;
}

void MonteCarloTree::backpropagation(double dValue)
{
    MonteCarloTree* p = this;
    int who = 0;
    while (p != nullptr)
    {
        if (who == 1)
        {
            p->value += dValue;
            --who;
        }
        else
        {
            p->value -= dValue;
            ++who;
        }
        ++p->n;
        p = p->parent;
    }
}

//主函数部分main.cpp
#include <iostream>
#include <random>
#include <ctime>
#include "MonteCarloTree.h"

using namespace std;

int main()
{
    srand((int)clock());
    int count = 0;                              //总计算的节点数(UCB计算公式中的N)
    int number = 1;                             //记录回合数
    int board[BOARDSIZE][BOARDSIZE] = {0};
    int lastPosition[2] = {0, 0};       //对手最后的走棋位置
    long double TIMEOUT = 0;                    //初始运算时间
    double ADD = 0;                               //递增倍率
    cout << "Input the initial machine operation time(ms): ";
    cin >> TIMEOUT;
    cout << "Incremental rate of operation time: ";
    cin >> ADD;
    cout << "--------------------" << endl;
    cout << "No." << number++ << " Round" << "Input your move:" << endl;
    cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
    cin >> lastPosition[1];
    cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
    cin >> lastPosition[0];
    cout << "--------------------" << endl;
    cout << "Your board:" << endl;
    cout << "Your move: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
    board[lastPosition[0]][lastPosition[1]] = 1;
    MonteCarloTree tree(board, lastPosition, nullptr, &count); //创建根节点
    tree.showBoard(1);
    int start = clock();
    while (clock() - start < TIMEOUT)
    {
        count++;                                            //计算节点数
        MonteCarloTree* node = tree.selection();            //Expansion一个新结点
        node->backpropagation(node->simulation());   //计算并反向广播估值
    }
    TIMEOUT += TIMEOUT * ADD;
    int bestNode[SIZE] = {0};               //所有最优子结点的位置
    int bestChildNum = 0;                   //最优结节点个数
    double maxValue = INT_MIN;                 //当前最优子结点的value值
    for (int i = 0; i < tree.returnChildCanInitCount(); ++i)
    {
        if (maxValue < tree.returnChildNode(i)->returnValue())
        {
            //for (int j = 0; j < SIZE; ++j) bestNode[j] = 0;
            for (int& e : bestNode) e = 0;
            bestChildNum = 0;
            bestNode[bestChildNum++] = i;
            maxValue = tree.returnChildNode(i)->returnValue();
        }
        else if (maxValue == tree.returnChildNode(i)->returnValue())
        {
            bestNode[bestChildNum++] = i;
        }
    }
    int position = rand() % bestChildNum;                           //随机选择一个最优走法
    int* bestPosition = tree.returnChildPosition(bestNode[position]);
    tree.initBoard(bestPosition[0], bestPosition[1], 1);
    cout << "--------------------" << endl;
    cout << "Machine's board:" << endl;
    cout << "Machine's move: White -> (" << bestPosition[1] << ", " << bestPosition[0] << ")" << endl;
    tree.showBoard(1);
    int copyBoard[BOARDSIZE][BOARDSIZE] = {0};
    tree.returnBoard(copyBoard);
    cout << "--------------------" << endl;
    cout << "No." << number++ << " Round" << "Input your move:" << endl;
    cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
    cin >> lastPosition[1];
    cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
    cin >> lastPosition[0];
    cout << "--------------------" << endl;
    if(!tree.initBoard(lastPosition[0], lastPosition[1], -1)) return 0;
    cout << "Your board:" << endl;
    cout << "Your move: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
    tree.showBoard(1);
    tree.returnBoard(copyBoard);
    while (true)
    {
        count = 0;
        MonteCarloTree newtree(copyBoard, lastPosition, nullptr, &count); //创建根节点
        int newstart = clock();
        while (clock() - newstart < TIMEOUT)
        {
            count++;                                            //计算节点数
            MonteCarloTree* node = newtree.selection();            //Expansion一个新结点
            node->backpropagation(node->simulation());   //计算并反向广播估值
        }
        TIMEOUT += TIMEOUT * ADD;
        int newbestNode[SIZE] = {0};        //所有最优子结点的位置
        bestChildNum = 0;                   //最优结节点个数
        maxValue = INT_MIN;                 //当前最优子结点的value值
        for (int i = 0; i < newtree.returnChildCanInitCount(); ++i)
        {
            if (maxValue < newtree.returnChildNode(i)->returnValue())
            {
                //for (int j = 0; j < SIZE; ++j) newbestNode[j] = 0;
                for (int& e : newbestNode) e = 0;
                bestChildNum = 0;
                newbestNode[bestChildNum++] = i;
                maxValue = newtree.returnChildNode(i)->returnValue();
            }
            else if (maxValue == newtree.returnChildNode(i)->returnValue())
            {
                newbestNode[bestChildNum++] = i;
            }
        }
        position = rand() % bestChildNum;                           //随机选择一个最优走法
        bestPosition = newtree.returnChildPosition(newbestNode[position]);
        cout << "--------------------" << endl;
        cout << "Machine's board:" << endl;
        cout << "Machine's move: White -> (" << bestPosition[1] << ", " << bestPosition[0] << ")" << endl;
        if(!newtree.initBoard(bestPosition[0], bestPosition[1], 1))
        {
            newtree.showBoard(1);
            break;
        }
        newtree.showBoard(1);
        cout << "--------------------" << endl;
        cout << "No." << number++ << " Round" << "Input your move:" << endl;
        cout << "Horizontal direction[0," << BOARDSIZE - 1 << "] = ";
        cin >> lastPosition[1];
        cout << "Vertical direction[0," << BOARDSIZE - 1 << "] = ";
        cin >> lastPosition[0];
        cout << "--------------------" << endl;
        cout << "Your board:" << endl;
        cout << "Your: Black -> (" << lastPosition[1] << ", " << lastPosition[0] << ")" << endl;
        if(!newtree.initBoard(lastPosition[0], lastPosition[1], -1))
        {
            newtree.showBoard(1);
            break;
        }
        newtree.showBoard(1);
        newtree.returnBoard(copyBoard);
    }
    return 0;
}
基于概率型的蒙特卡洛搜索树实现的简易不围棋

基于概率型的蒙特卡洛搜索树实现的简易不围棋

算法：蒙特卡洛搜索树(MCTS) 马尔代夫决策过程(UCB公式) 深度优先搜索(DFS)

0 评论