00001 //---------------------------------------------------------------------------- 00002 /** @file GoUctGlobalSearch.h 00003 */ 00004 //---------------------------------------------------------------------------- 00005 00006 #ifndef GOUCT_GLOBALSEARCH_H 00007 #define GOUCT_GLOBALSEARCH_H 00008 00009 #include <boost/scoped_ptr.hpp> 00010 #include "GoBoard.h" 00011 #include "GoBoardUtil.h" 00012 #include "GoEyeUtil.h" 00013 #include "GoRegionBoard.h" 00014 #include "GoSafetySolver.h" 00015 #include "GoUctDefaultPriorKnowledge.h" 00016 #include "GoUctSearch.h" 00017 #include "GoUctUtil.h" 00018 00019 //---------------------------------------------------------------------------- 00020 00021 /** Enable the usage of the safety solver (currently not functional). 00022 Compile-time constant to enable the usage of the safety solver. This 00023 cost some performance and there are no experiments yet showing that it 00024 improves the playing strength. It is also currently not functional, 00025 since it has not been updated after code changes. 00026 */ 00027 const bool GOUCT_USE_SAFETY_SOLVER = false; 00028 00029 //---------------------------------------------------------------------------- 00030 00031 /** Parameters for GoUctGlobalSearchState */ 00032 struct GoUctGlobalSearchStateParam 00033 { 00034 /** Use the mercy rule. 00035 Count games early as win or loss, if stone difference on board 00036 exceeds a threshold of 30% of the total number of points on board. 00037 */ 00038 bool m_mercyRule; 00039 00040 /** Compute probabilities of territory in terminal positions. */ 00041 bool m_territoryStatistics; 00042 00043 /** Modify game result by the length of the simulation. 00044 This modifies the win/loss result (1/0) by the length of the game 00045 counted as number of moves from the root position of the search. This 00046 can help to prefer shorter games (if it is a win) or longer games (if 00047 it is a loss). It can also have a positive effect on the playing 00048 strength, because the variance of results is larger for longer games. 00049 The modification is added for losses and subtracted for wins. The 00050 value is the length of times the value of the modification parameter. 00051 The maximum modification is 0.5. The default value of the parameter 00052 is 0. 00053 */ 00054 float m_lengthModification; 00055 00056 /** Modify game result by score. 00057 This modifies the win/loss result (1/0) by the score of the end 00058 position. The modification is added for losses and subtracted for 00059 wins. The modification value is the score divided by the maximum 00060 score, which can be reached on the board, times the value of the score 00061 modification parameter. This helps to play moves to maximize the score 00062 even if the game is already clearly lost or won. Otherwise all moves 00063 look equal in clearly won or lost positions. It can also reduce the 00064 typical game length and could even have a positive effect on the 00065 playing strength. The modification can be disabled by setting the 00066 parameter to zero. The default value is 0.02. 00067 */ 00068 float m_scoreModification; 00069 00070 GoUctGlobalSearchStateParam(); 00071 }; 00072 00073 //---------------------------------------------------------------------------- 00074 00075 /** @page gouctpassmoves Handling of pass-moves in GoUctGlobalSearch 00076 00077 @section gouctpassplayout Play-out phase 00078 00079 Pass moves are not played in the play-out phase of the simulations as 00080 long as there are still moves for which GoUctUtil::GeneratePoint 00081 returns true, which are mainly moves that don't fill single point 00082 eyes (see GoUctUtil::GeneratePoint and GoBoardUtil::IsCompletelySurrounded 00083 for an exact definition). Therefore, it is a requirement on 00084 GoUctPlayoutPolicy::GenerateMove, not to return pass moves earlier. 00085 00086 This requirement ensures that all simulated games terminate (as long as 00087 there is no super-ko cycle, because for speed reasons only simple ko's 00088 are checked) and that the terminal position after two passes in a row is 00089 a position that can be quickly evaluated with 00090 GoBoardUtil::ScoreSimpleEndPosition. 00091 00092 @section gouctpassintree In-tree phase 00093 00094 In the in-tree-phase of the game, pass moves are always allowed to avoid 00095 zugzwang situations, if there is a seki on the board. After two 00096 passes the game is terminated and scored with 00097 GoBoardUtil::TrompTaylorScore. 00098 */ 00099 00100 /** Global UCT-Search for Go. 00101 - @ref gouctpassmoves 00102 @tparam POLICY The playout policy 00103 */ 00104 template<class POLICY> 00105 class GoUctGlobalSearchState 00106 : public GoUctState 00107 { 00108 public: 00109 const SgBWSet& m_safe; 00110 00111 const SgPointArray<bool>& m_allSafe; 00112 00113 /** Probabilities that a point belongs to Black in a terminal position. 00114 Only computed if GoUctGlobalSearchStateParam::m_territoryStatistics. 00115 */ 00116 SgPointArray<SgUctStatistics> m_territoryStatistics; 00117 00118 /** Constructor. 00119 @param threadId The number of the thread. Needed for passing to 00120 constructor of SgUctThreadState. 00121 @param bd The board 00122 @param policy The random policy (takes ownership). It is possible to 00123 set the policy to null at construction time to allowed a multi-step 00124 construction; but then a policy has to be set with SetPolicy(), before 00125 the search is used. 00126 @param param Parameters. Stores a reference to the argument. 00127 @param policy_param. Stores a reference to the argument. 00128 @param safe Safety information. Stores a reference to the argument. 00129 @param allSafe Safety information. Stores a reference to the argument. 00130 */ 00131 GoUctGlobalSearchState(std::size_t threadId, const GoBoard& bd, 00132 POLICY* policy, 00133 const GoUctGlobalSearchStateParam& param, 00134 const GoUctPlayoutPolicyParam& policyParam, 00135 const SgBWSet& safe, 00136 const SgPointArray<bool>& allSafe); 00137 00138 ~GoUctGlobalSearchState(); 00139 00140 float Evaluate(); 00141 00142 bool GenerateAllMoves(std::size_t count, std::vector<SgMoveInfo>& moves, 00143 SgProvenNodeType& provenType); 00144 00145 SgMove GeneratePlayoutMove(bool& skipRaveUpdate); 00146 00147 void ExecutePlayout(SgMove move); 00148 00149 void GameStart(); 00150 00151 void EndPlayout(); 00152 00153 void StartPlayout(); 00154 00155 void StartPlayouts(); 00156 00157 void StartSearch(); 00158 00159 POLICY* Policy(); 00160 00161 /** Set random policy. 00162 Sets a new random policy and deletes the old one, if it existed. 00163 */ 00164 void SetPolicy(POLICY* policy); 00165 00166 void ClearTerritoryStatistics(); 00167 00168 private: 00169 const GoUctGlobalSearchStateParam& m_param; 00170 00171 const GoUctPlayoutPolicyParam& m_policyParam; 00172 00173 /** See SetMercyRule() */ 00174 bool m_mercyRuleTriggered; 00175 00176 /** Number of pass moves played in a row in the playout phase. */ 00177 int m_passMovesPlayoutPhase; 00178 00179 /** See SetMercyRule() */ 00180 int m_mercyRuleThreshold; 00181 00182 /** Difference of stones on board. 00183 Black counts positive. 00184 */ 00185 int m_stoneDiff; 00186 00187 /** Board move number at root node of search. */ 00188 int m_initialMoveNumber; 00189 00190 /** The area in which moves should be generated. */ 00191 GoPointList m_area; 00192 00193 /** See SetMercyRule() */ 00194 float m_mercyRuleResult; 00195 00196 /** Inverse of maximum score one can reach on a board of the current 00197 size. 00198 */ 00199 float m_invMaxScore; 00200 00201 SgRandom m_random; 00202 00203 GoUctDefaultPriorKnowledge m_priorKnowledge; 00204 00205 boost::scoped_ptr<POLICY> m_policy; 00206 00207 /** Not implemented */ 00208 GoUctGlobalSearchState(const GoUctGlobalSearchState& search); 00209 00210 /** Not implemented */ 00211 GoUctGlobalSearchState& operator=(const GoUctGlobalSearchState& search); 00212 00213 bool CheckMercyRule(); 00214 00215 template<class BOARD> 00216 float EvaluateBoard(const BOARD& bd, float komi); 00217 00218 /** Generates all legal moves with no knowledge values. */ 00219 void GenerateLegalMoves(std::vector<SgMoveInfo>& moves); 00220 00221 float GetKomi() const; 00222 }; 00223 00224 template<class POLICY> 00225 GoUctGlobalSearchState<POLICY>::GoUctGlobalSearchState(std::size_t threadId, 00226 const GoBoard& bd, POLICY* policy, 00227 const GoUctGlobalSearchStateParam& param, 00228 const GoUctPlayoutPolicyParam& policyParam, 00229 const SgBWSet& safe, const SgPointArray<bool>& allSafe) 00230 : GoUctState(threadId, bd), 00231 m_safe(safe), 00232 m_allSafe(allSafe), 00233 m_param(param), 00234 m_policyParam(policyParam), 00235 m_priorKnowledge(Board(), m_policyParam), 00236 m_policy(policy) 00237 { 00238 ClearTerritoryStatistics(); 00239 } 00240 00241 template<class POLICY> 00242 GoUctGlobalSearchState<POLICY>::~GoUctGlobalSearchState() 00243 { 00244 } 00245 00246 /** See SetMercyRule() */ 00247 template<class POLICY> 00248 bool GoUctGlobalSearchState<POLICY>::CheckMercyRule() 00249 { 00250 SG_ASSERT(m_param.m_mercyRule); 00251 // Only used in playout; m_stoneDiff only defined in playout 00252 SG_ASSERT(IsInPlayout()); 00253 if (m_stoneDiff >= m_mercyRuleThreshold) 00254 { 00255 m_mercyRuleTriggered = true; 00256 m_mercyRuleResult = (UctBoard().ToPlay() == SG_BLACK ? 1 : 0); 00257 } 00258 else if (m_stoneDiff <= -m_mercyRuleThreshold) 00259 { 00260 m_mercyRuleTriggered = true; 00261 m_mercyRuleResult = (UctBoard().ToPlay() == SG_WHITE ? 1 : 0); 00262 } 00263 else 00264 SG_ASSERT(! m_mercyRuleTriggered); 00265 return m_mercyRuleTriggered; 00266 } 00267 00268 template<class POLICY> 00269 void GoUctGlobalSearchState<POLICY>::ClearTerritoryStatistics() 00270 { 00271 for (SgPointArray<SgUctStatistics>::NonConstIterator 00272 it(m_territoryStatistics); it; ++it) 00273 (*it).Clear(); 00274 } 00275 00276 template<class POLICY> 00277 void GoUctGlobalSearchState<POLICY>::EndPlayout() 00278 { 00279 GoUctState::EndPlayout(); 00280 m_policy->EndPlayout(); 00281 } 00282 00283 template<class POLICY> 00284 float GoUctGlobalSearchState<POLICY>::Evaluate() 00285 { 00286 float komi = GetKomi(); 00287 if (IsInPlayout()) 00288 return EvaluateBoard(UctBoard(), komi); 00289 else 00290 return EvaluateBoard(Board(), komi); 00291 } 00292 00293 template<class POLICY> 00294 template<class BOARD> 00295 float GoUctGlobalSearchState<POLICY>::EvaluateBoard(const BOARD& bd, 00296 float komi) 00297 { 00298 float score; 00299 SgPointArray<SgEmptyBlackWhite> scoreBoard; 00300 SgPointArray<SgEmptyBlackWhite>* scoreBoardPtr; 00301 if (m_param.m_territoryStatistics) 00302 scoreBoardPtr = &scoreBoard; 00303 else 00304 scoreBoardPtr = 0; 00305 if (m_passMovesPlayoutPhase < 2) 00306 // Two passes not in playout phase, see comment in GenerateAllMoves() 00307 score = GoBoardUtil::TrompTaylorScore(bd, komi, scoreBoardPtr); 00308 else 00309 { 00310 if (m_param.m_mercyRule && m_mercyRuleTriggered) 00311 return m_mercyRuleResult; 00312 score = GoBoardUtil::ScoreSimpleEndPosition(bd, komi, m_safe, 00313 false, scoreBoardPtr); 00314 } 00315 if (m_param.m_territoryStatistics) 00316 for (typename BOARD::Iterator it(bd); it; ++it) 00317 switch (scoreBoard[*it]) 00318 { 00319 case SG_BLACK: 00320 m_territoryStatistics[*it].Add(1); 00321 break; 00322 case SG_WHITE: 00323 m_territoryStatistics[*it].Add(0); 00324 break; 00325 case SG_EMPTY: 00326 m_territoryStatistics[*it].Add(0.5); 00327 break; 00328 } 00329 if (bd.ToPlay() != SG_BLACK) 00330 score *= -1; 00331 float lengthMod = min(GameLength() * m_param.m_lengthModification, 0.5f); 00332 if (score > std::numeric_limits<float>::epsilon()) 00333 return 00334 (1 - m_param.m_scoreModification) 00335 + m_param.m_scoreModification * score * m_invMaxScore 00336 - lengthMod; 00337 else if (score < -std::numeric_limits<float>::epsilon()) 00338 return 00339 m_param.m_scoreModification 00340 + m_param.m_scoreModification * score * m_invMaxScore 00341 + lengthMod; 00342 else 00343 // Draw. Can happen if komi is an integer 00344 return 0; 00345 } 00346 00347 template<class POLICY> 00348 void GoUctGlobalSearchState<POLICY>::ExecutePlayout(SgMove move) 00349 { 00350 GoUctState::ExecutePlayout(move); 00351 const GoUctBoard& bd = UctBoard(); 00352 if (bd.ToPlay() == SG_BLACK) 00353 m_stoneDiff -= bd.NuCapturedStones(); 00354 else 00355 m_stoneDiff += bd.NuCapturedStones(); 00356 m_policy->OnPlay(); 00357 } 00358 00359 template<class POLICY> 00360 void GoUctGlobalSearchState<POLICY>::GameStart() 00361 { 00362 GoUctState::GameStart(); 00363 } 00364 00365 template<class POLICY> 00366 void GoUctGlobalSearchState<POLICY>::GenerateLegalMoves( 00367 std::vector<SgMoveInfo>& moves) 00368 { 00369 //SG_ASSERT(! IsInPlayout()); 00370 const GoBoard& bd = Board(); 00371 SG_ASSERT(! bd.Rules().AllowSuicide()); 00372 00373 if (GoBoardUtil::TwoPasses(bd)) 00374 // Evaluate with Tromp-Taylor (we have no other evaluation that can 00375 // score arbitrary positions). However, if the rules don't require 00376 // CaptureDead(), the two passes need to be played in the search 00377 // sequence. This avoids cases, in which playing a pass (after the 00378 // opponent's last move in the real game was a pass) is only good 00379 // under Tromp-Taylor scoring (see 00380 // regression/sgf/pass/tromp-taylor-pass.sgf). 00381 // Both won't work in Japanese rules, but it is not easy to define 00382 // what a terminal position is in Japanese rules. 00383 if (bd.Rules().CaptureDead() 00384 || bd.MoveNumber() - m_initialMoveNumber >= 2) 00385 return; 00386 00387 SgBlackWhite toPlay = bd.ToPlay(); 00388 for (GoBoard::Iterator it(bd); it; ++it) 00389 { 00390 SgPoint p = *it; 00391 if (bd.IsEmpty(p) 00392 && ! GoEyeUtil::IsSimpleEye(bd, p, toPlay) 00393 && ! m_allSafe[p] 00394 && bd.IsLegal(p, toPlay)) 00395 moves.push_back(SgMoveInfo(p)); 00396 } 00397 00398 // Full randomization is too expensive and in most cases not necessary, 00399 // if prior knowledge is available for initialization or RAVE values are 00400 // available after playing the first move. However we put a random move 00401 // to the front, because the first move in a Go board iteration is often 00402 // a bad corner move 00403 if (moves.size() > 1) 00404 std::swap(moves[0], moves[m_random.Int(moves.size())]); 00405 moves.push_back(SgMoveInfo(SG_PASS)); 00406 } 00407 00408 template<class POLICY> 00409 bool GoUctGlobalSearchState<POLICY>::GenerateAllMoves(std::size_t count, 00410 std::vector<SgMoveInfo>& moves, 00411 SgProvenNodeType& provenType) 00412 { 00413 provenType = SG_NOT_PROVEN; 00414 moves.clear(); // FIXME: needed? 00415 GenerateLegalMoves(moves); 00416 if (! moves.empty()) 00417 { 00418 if (count == 0) 00419 m_priorKnowledge.ProcessPosition(moves); 00420 } 00421 return false; 00422 } 00423 00424 template<class POLICY> 00425 SgMove GoUctGlobalSearchState<POLICY>::GeneratePlayoutMove( 00426 bool& skipRaveUpdate) 00427 { 00428 SG_ASSERT(IsInPlayout()); 00429 if (m_param.m_mercyRule && CheckMercyRule()) 00430 return SG_NULLMOVE; 00431 SgPoint move = m_policy->GenerateMove(); 00432 SG_ASSERT(move != SG_NULLMOVE); 00433 #ifndef NDEBUG 00434 // Check that policy generates pass only if no points are left for which 00435 // GeneratePoint() returns true. See GoUctPlayoutPolicy::GenerateMove() 00436 if (move == SG_PASS) 00437 { 00438 const GoUctBoard& bd = UctBoard(); 00439 SgBalancer balancer(100); // FIXME: Could be quite slow!! 00440 for (GoUctBoard::Iterator it(bd); it; ++it) 00441 SG_ASSERT( bd.Occupied(*it) 00442 || m_safe.OneContains(*it) 00443 || GoBoardUtil::SelfAtari(bd, *it) 00444 || ! GoUctUtil::GeneratePoint(bd, balancer, 00445 *it, bd.ToPlay()) 00446 ); 00447 } 00448 else 00449 SG_ASSERT(! m_safe.OneContains(move)); 00450 #endif 00451 // The position guaranteed to be a terminal position, which can be 00452 // evaluated with GoBoardUtil::ScoreSimpleEndPosition(), only after two 00453 // passes in a row, both of them generated by GeneratePlayoutMove() in 00454 // the playout phase 00455 if (move == SG_PASS) 00456 { 00457 skipRaveUpdate = true; // Don't update RAVE values for pass moves 00458 if (m_passMovesPlayoutPhase < 2) 00459 ++m_passMovesPlayoutPhase; 00460 else 00461 return SG_NULLMOVE; 00462 } 00463 else 00464 m_passMovesPlayoutPhase = 0; 00465 return move; 00466 } 00467 00468 /** Get komi including extra handicap komi points, if used by the rules. */ 00469 template<class POLICY> 00470 float GoUctGlobalSearchState<POLICY>::GetKomi() const 00471 { 00472 const GoRules& rules = Board().Rules(); 00473 float komi = rules.Komi().ToFloat(); 00474 if (rules.ExtraHandicapKomi()) 00475 komi += rules.Handicap(); 00476 return komi; 00477 } 00478 00479 template<class POLICY> 00480 inline POLICY* GoUctGlobalSearchState<POLICY>::Policy() 00481 { 00482 return m_policy.get(); 00483 } 00484 00485 template<class POLICY> 00486 void GoUctGlobalSearchState<POLICY>::SetPolicy(POLICY* policy) 00487 { 00488 m_policy.reset(policy); 00489 } 00490 00491 template<class POLICY> 00492 void GoUctGlobalSearchState<POLICY>::StartPlayout() 00493 { 00494 GoUctState::StartPlayout(); 00495 m_passMovesPlayoutPhase = 0; 00496 m_mercyRuleTriggered = false; 00497 const GoBoard& bd = Board(); 00498 m_stoneDiff = bd.All(SG_BLACK).Size() - bd.All(SG_WHITE).Size(); 00499 m_policy->StartPlayout(); 00500 } 00501 00502 template<class POLICY> 00503 void GoUctGlobalSearchState<POLICY>::StartPlayouts() 00504 { 00505 GoUctState::StartPlayouts(); 00506 } 00507 00508 template<class POLICY> 00509 void GoUctGlobalSearchState<POLICY>::StartSearch() 00510 { 00511 GoUctState::StartSearch(); 00512 const GoBoard& bd = Board(); 00513 int size = bd.Size(); 00514 float maxScore = size * size + GetKomi(); 00515 m_invMaxScore = 1 / maxScore; 00516 m_initialMoveNumber = bd.MoveNumber(); 00517 m_mercyRuleThreshold = static_cast<int>(0.3 * size * size); 00518 ClearTerritoryStatistics(); 00519 } 00520 00521 //---------------------------------------------------------------------------- 00522 00523 /** Factory for creating a GoUctGlobalSearchState. 00524 @tparam POLICY The playout policy 00525 @tparam FACTORY The factory for the playout policy 00526 */ 00527 template<class POLICY, class FACTORY> 00528 class GoUctGlobalSearchStateFactory 00529 : public SgUctThreadStateFactory 00530 { 00531 public: 00532 /** Constructor. 00533 @param bd 00534 @param playoutPolicyFactory Factory for playout policies. 00535 Stores a reference. Lifetime of parameter must exceed the lifetime of 00536 this instance. 00537 @param safe 00538 @param allSafe 00539 */ 00540 GoUctGlobalSearchStateFactory(GoBoard& bd, 00541 FACTORY& playoutPolicyFactory, 00542 const GoUctPlayoutPolicyParam& policyParam, 00543 const SgBWSet& safe, 00544 const SgPointArray<bool>& allSafe); 00545 00546 SgUctThreadState* Create(std::size_t threadId, const SgUctSearch& search); 00547 00548 private: 00549 GoBoard& m_bd; 00550 00551 FACTORY& m_playoutPolicyFactory; 00552 00553 const GoUctPlayoutPolicyParam& m_policyParam; 00554 00555 const SgBWSet& m_safe; 00556 00557 const SgPointArray<bool>& m_allSafe; 00558 }; 00559 00560 template<class POLICY, class FACTORY> 00561 GoUctGlobalSearchStateFactory<POLICY,FACTORY> 00562 ::GoUctGlobalSearchStateFactory(GoBoard& bd, 00563 FACTORY& playoutPolicyFactory, 00564 const GoUctPlayoutPolicyParam& policyParam, 00565 const SgBWSet& safe, 00566 const SgPointArray<bool>& allSafe) 00567 : m_bd(bd), 00568 m_playoutPolicyFactory(playoutPolicyFactory), 00569 m_policyParam(policyParam), 00570 m_safe(safe), 00571 m_allSafe(allSafe) 00572 { 00573 } 00574 00575 //---------------------------------------------------------------------------- 00576 00577 /** Full board Monte-Carlo tree search. 00578 @tparam POLICY The playout policy 00579 @tparam FACTORY The factory for the playout policy 00580 */ 00581 template<class POLICY, class FACTORY> 00582 class GoUctGlobalSearch 00583 : public GoUctSearch 00584 { 00585 public: 00586 GoUctGlobalSearchStateParam m_param; 00587 00588 /** Constructor. 00589 @param bd 00590 @param playoutPolicyFactory Creates multiple instances of the playout 00591 policies. Takes ownership. playoutPolicyFactory should not have 00592 SetSafe() and SetAllSafe() already set, because the search will call 00593 these functions using its own safety information. 00594 */ 00595 GoUctGlobalSearch(GoBoard& bd, 00596 FACTORY* playoutPolicyFactory, 00597 const GoUctPlayoutPolicyParam& policyParam); 00598 00599 /** @name Pure virtual functions of SgUctSearch */ 00600 // @{ 00601 00602 float UnknownEval() const; 00603 00604 // @} // @name 00605 00606 00607 /** @name Virtual functions of SgUctSearch */ 00608 // @{ 00609 00610 void OnStartSearch(); 00611 00612 void OnSearchIteration(std::size_t gameNumber, int threadId, 00613 const SgUctGameInfo& info); 00614 00615 // @} // @name 00616 00617 00618 /** Set default search parameters optimized for a board size. */ 00619 void SetDefaultParameters(int boardSize); 00620 00621 /** Output live graphics commands for GoGui. 00622 Similar to the GOUCT_LIVEGFX_COUNTS mode in GoUctSearch, but the 00623 influence data shows the terriroy statistics (which must be enabled) 00624 instead of the move values. No move counts are shown. 00625 The live graphics interval from GoUctSearch will be used. 00626 @see GoUctSearch::LiveGfxInterval() 00627 */ 00628 bool GlobalSearchLiveGfx() const; 00629 00630 /** See GlobalSearchLiveGfx() */ 00631 void SetGlobalSearchLiveGfx(bool enable); 00632 00633 private: 00634 SgBWSet m_safe; 00635 00636 SgPointArray<bool> m_allSafe; 00637 00638 boost::scoped_ptr<FACTORY> m_playoutPolicyFactory; 00639 00640 GoRegionBoard m_regions; 00641 00642 /** See GlobalSearchLiveGfx() */ 00643 bool m_globalSearchLiveGfx; 00644 }; 00645 00646 template<class POLICY, class FACTORY> 00647 GoUctGlobalSearch<POLICY,FACTORY>::GoUctGlobalSearch(GoBoard& bd, 00648 FACTORY* playoutFactory, 00649 const GoUctPlayoutPolicyParam& policyParam) 00650 : GoUctSearch(bd, 0), 00651 m_playoutPolicyFactory(playoutFactory), 00652 m_regions(bd), 00653 m_globalSearchLiveGfx(GOUCT_LIVEGFX_NONE) 00654 { 00655 SgUctThreadStateFactory* stateFactory = 00656 new GoUctGlobalSearchStateFactory<POLICY,FACTORY>(bd, 00657 *playoutFactory, 00658 policyParam, 00659 m_safe, m_allSafe); 00660 SetThreadStateFactory(stateFactory); 00661 SetDefaultParameters(bd.Size()); 00662 } 00663 00664 template<class POLICY, class FACTORY> 00665 inline bool GoUctGlobalSearch<POLICY,FACTORY>::GlobalSearchLiveGfx() const 00666 { 00667 return m_globalSearchLiveGfx; 00668 } 00669 00670 template<class POLICY, class FACTORY> 00671 void GoUctGlobalSearch<POLICY,FACTORY>::OnSearchIteration( 00672 std::size_t gameNumber, 00673 int threadId, 00674 const SgUctGameInfo& info) 00675 { 00676 GoUctSearch::OnSearchIteration(gameNumber, threadId, info); 00677 if (m_globalSearchLiveGfx && threadId == 0 00678 && gameNumber % LiveGfxInterval() == 0) 00679 { 00680 const GoUctGlobalSearchState<POLICY>& state = 00681 dynamic_cast<GoUctGlobalSearchState<POLICY>&>(ThreadState(0)); 00682 SgDebug() << "gogui-gfx:\n"; 00683 GoUctUtil::GfxBestMove(*this, ToPlay(), SgDebug()); 00684 GoUctUtil::GfxTerritoryStatistics(state.m_territoryStatistics, 00685 Board(), SgDebug()); 00686 GoUctUtil::GfxStatus(*this, SgDebug()); 00687 SgDebug() << '\n'; 00688 } 00689 } 00690 00691 template<class POLICY, class FACTORY> 00692 void GoUctGlobalSearch<POLICY,FACTORY>::OnStartSearch() 00693 { 00694 GoUctSearch::OnStartSearch(); 00695 m_safe.Clear(); 00696 m_allSafe.Fill(false); 00697 if (GOUCT_USE_SAFETY_SOLVER) 00698 { 00699 GoBoard& bd = Board(); 00700 GoSafetySolver solver(bd, &m_regions); 00701 solver.FindSafePoints(&m_safe); 00702 for (GoBoard::Iterator it(bd); it; ++it) 00703 m_allSafe[*it] = m_safe.OneContains(*it); 00704 } 00705 if (m_globalSearchLiveGfx && ! m_param.m_territoryStatistics) 00706 SgWarning() << 00707 "GoUctGlobalSearch: " 00708 "live graphics need territory statistics enabled\n"; 00709 } 00710 00711 template<class POLICY, class FACTORY> 00712 void GoUctGlobalSearch<POLICY,FACTORY>::SetDefaultParameters(int boardSize) 00713 { 00714 SetFirstPlayUrgency(1); 00715 SetMoveSelect(SG_UCTMOVESELECT_COUNT); 00716 SetRave(true); 00717 SetExpandThreshold(1); 00718 SetVirtualLoss(true); 00719 SetBiasTermConstant(0.0); 00720 if (boardSize < 15) 00721 { 00722 // These parameters were mainly tested on 9x9 00723 // using GoUctPlayoutPolicy and GoUctDefaultPriorKnowledge 00724 SetRaveWeightInitial(1.0); 00725 SetRaveWeightFinal(5000); 00726 m_param.m_lengthModification = 0; 00727 } 00728 else 00729 { 00730 // These parameters were mainly tested on 19x19 00731 // using GoUctPlayoutPolicy and GoUctDefaultPriorKnowledge 00732 SetRaveWeightInitial(0.9); 00733 SetRaveWeightFinal(5000); 00734 m_param.m_lengthModification = 0.00028; 00735 } 00736 } 00737 00738 template<class POLICY, class FACTORY> 00739 inline void GoUctGlobalSearch<POLICY,FACTORY>::SetGlobalSearchLiveGfx( 00740 bool enable) 00741 { 00742 m_globalSearchLiveGfx = enable; 00743 } 00744 00745 template<class POLICY, class FACTORY> 00746 float GoUctGlobalSearch<POLICY,FACTORY>::UnknownEval() const 00747 { 00748 // Note: 0.5 is not a possible value for a Bernoulli variable, better 00749 // use 0? 00750 return 0.5; 00751 } 00752 00753 //---------------------------------------------------------------------------- 00754 00755 template<class POLICY, class FACTORY> 00756 SgUctThreadState* GoUctGlobalSearchStateFactory<POLICY,FACTORY>::Create( 00757 std::size_t threadId, const SgUctSearch& search) 00758 { 00759 const GoUctGlobalSearch<POLICY,FACTORY>& globalSearch = 00760 dynamic_cast<const GoUctGlobalSearch<POLICY,FACTORY>&>(search); 00761 GoUctGlobalSearchState<POLICY>* state = 00762 new GoUctGlobalSearchState<POLICY>(threadId, globalSearch.Board(), 0, 00763 globalSearch.m_param, 00764 m_policyParam, 00765 m_safe, m_allSafe); 00766 POLICY* policy = m_playoutPolicyFactory.Create(state->UctBoard()); 00767 state->SetPolicy(policy); 00768 return state; 00769 } 00770 00771 //---------------------------------------------------------------------------- 00772 00773 #endif // GOUCT_GLOBALSEARCH_H