diff --git a/README.md b/README.md index ce7e87b97..e751cf556 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,10 @@ Run a high-performance match engine that will play a pool of bots against each o * `./katago match -config .cfg -log-file match.log -sgf-output-dir ` +Tune PUCT search hyperparameters via QRS-Tune sequential optimization: + + * `./katago tune-params -config .cfg` + Force OpenCL tuner to re-tune: * `./katago tuner -config .cfg` diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8db79ca73..e909a76b6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -307,6 +307,7 @@ add_executable(katago tests/tinymodel.cpp tests/tinymodeldata.cpp distributed/client.cpp + qrstune/QRSOptimizer.cpp command/commandline.cpp command/analysis.cpp command/benchmark.cpp @@ -318,6 +319,7 @@ add_executable(katago command/gputest.cpp command/gtp.cpp command/match.cpp + command/tuneparams.cpp command/misc.cpp command/runtests.cpp command/sandbox.cpp diff --git a/cpp/README.md b/cpp/README.md index 1f5d8d21f..90e328144 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -35,6 +35,7 @@ Summary of source folders, in approximate dependency order, from lowest level to * `distributed` - Code for talking to https webserver for volunteers to contribute distributed self-play games for training. * `tests` - A variety of tests. * `models` - A directory with a small number of small-sized (and not very strong) models for running tests. +* `qrstune` - QRS-Tune (Quadratic Response Surface) optimizer for hyperparameter tuning. * `command` - Top-level subcommands callable by users. GTP, analysis commands, benchmarking, selfplay data generation, etc. * `commandline.{cpp,h}` - Common command line logic shared by all subcommands. * `gtp.cpp` - Main GTP engine. @@ -44,6 +45,7 @@ Summary of source folders, in approximate dependency order, from lowest level to * `selfplay.cpp` - Selfplay data generation engine. * `gatekeeper.cpp` - Gating engine to filter neural nets for selfplay data generation. * `match.cpp` - Match engine for testing different parameters that can use huge batch sizes to efficiently play games in parallel. + * `tuneparams.cpp` - Tune PUCT search hyperparameters via QRS-Tune sequential optimization. Other folders: diff --git a/cpp/command/match.cpp b/cpp/command/match.cpp index 721c3ef20..9db0d1050 100644 --- a/cpp/command/match.cpp +++ b/cpp/command/match.cpp @@ -1,4 +1,6 @@ #include "../core/global.h" +#include "../core/elo.h" +#include "../core/fancymath.h" #include "../core/fileutils.h" #include "../core/makedir.h" #include "../core/config_parser.h" @@ -11,6 +13,7 @@ #include "../command/commandline.h" #include "../main.h" +#include #include using namespace std; @@ -250,10 +253,13 @@ int MainCmds::match(const vector& args) { int64_t gameCount = 0; std::map timeUsedByBotMap; std::map movesByBotMap; + map, array> pairStats; + //key: {nameA, nameB} with nameA < nameB lexicographically + //value: {winsA, winsB, draws} auto runMatchLoop = [ &gameRunner,&matchPairer,&sgfOutputDir,&logger,&gameSeedBase,&patternBonusTables, - &statsMutex, &gameCount, &timeUsedByBotMap, &movesByBotMap + &statsMutex, &gameCount, &timeUsedByBotMap, &movesByBotMap, &pairStats ]( uint64_t threadHash ) { @@ -303,6 +309,20 @@ int MainCmds::match(const vector& args) { movesByBotMap[gameData->bName] += (double)gameData->bMoveCount; movesByBotMap[gameData->wName] += (double)gameData->wMoveCount; + //Update pairwise W/L/D stats + { + const string& bName = gameData->bName; + const string& wName = gameData->wName; + Player winner = gameData->endHist.winner; + bool aIsBlack = (bName < wName); + const string& nameA = aIsBlack ? bName : wName; + const string& nameB = aIsBlack ? wName : bName; + auto& ps = pairStats[{nameA, nameB}]; + if(winner == P_BLACK) { if(aIsBlack) ps[0]++; else ps[1]++; } + else if(winner == P_WHITE) { if(aIsBlack) ps[1]++; else ps[0]++; } + else { ps[2]++; } + } + int64_t x = gameCount; while(x % 2 == 0 && x > 1) x /= 2; if(x == 1 || x == 3 || x == 5) { @@ -344,6 +364,58 @@ int MainCmds::match(const vector& args) { for(int i = 0; i activeBots; + { + set seen; + for(auto& kv : pairStats) { + seen.insert(kv.first.first); + seen.insert(kv.first.second); + } + activeBots.assign(seen.begin(), seen.end()); + } + + vector elo, eloStderr; + bool eloConverged = ComputeElos::computeBradleyTerryElo(activeBots, pairStats, elo, eloStderr); + + logger.write(""); + if(!eloConverged) + logger.write("Warning: Bradley-Terry Elo estimation did not fully converge"); + logger.write("=== match Results ==="); + logger.write("Global Elo (Bradley-Terry MLE, reference=" + activeBots[0] + "):"); + for(int i = 0; i < (int)activeBots.size(); i++) { + string sign = (elo[i] >= 0) ? "+" : ""; + string line = " " + activeBots[i] + " : " + + sign + Global::strprintf("%.1f", elo[i]) + " +/- " + Global::strprintf("%.1f", eloStderr[i]); + if(i == 0) line += " (reference)"; + logger.write(line); + } + logger.write(""); + logger.write("Pairwise summary:"); + for(auto& kv : pairStats) { + int64_t wA = kv.second[0], wB = kv.second[1], d = kv.second[2]; + int64_t total = wA + wB + d; + if(total == 0) continue; + double wins = wA + 0.5 * d; + double lo, hi; + FancyMath::wilsonCI95(wins, (double)total, lo, hi); + double pval = FancyMath::oneTailedPValue(wins, (double)total); + string sig = (pval < 0.05) ? " *" : ""; + logger.write( + " " + kv.first.first + " vs " + kv.first.second + + " : Games=" + Global::int64ToString(total) + + " W=" + Global::int64ToString(wA) + + " L=" + Global::int64ToString(wB) + + " D=" + Global::int64ToString(d) + + " | " + kv.first.first + " winrate=" + Global::strprintf("%.3f", wins/total) + + " [95% CI: " + Global::strprintf("%.3f", lo) + ", " + Global::strprintf("%.3f", hi) + "]" + + " | p=" + Global::strprintf("%.4f", pval) + sig + ); + } + logger.write(""); + } + delete matchPairer; delete gameRunner; diff --git a/cpp/command/runtests.cpp b/cpp/command/runtests.cpp index 75bb9760c..66429fb32 100644 --- a/cpp/command/runtests.cpp +++ b/cpp/command/runtests.cpp @@ -5,6 +5,8 @@ #include "../core/rand.h" #include "../core/elo.h" #include "../core/fancymath.h" +#include "../qrstune/QRSOptimizer.h" +#include "../command/tuneparams.h" #include "../core/config_parser.h" #include "../core/datetime.h" #include "../core/fileutils.h" @@ -35,6 +37,8 @@ int MainCmds::runtests(const vector& args) { DateTime::runTests(); FancyMath::runTests(); ComputeElos::runTests(); + QRSTune::runTests(); + TuneParams::runTests(); Base64::runTests(); ThreadTest::runTests(); diff --git a/cpp/command/tuneparams.cpp b/cpp/command/tuneparams.cpp new file mode 100644 index 000000000..d9e112754 --- /dev/null +++ b/cpp/command/tuneparams.cpp @@ -0,0 +1,515 @@ +//command/tuneparams.cpp +//KataGo hyperparameter tuning via QRS-Tune sequential optimization. +//Runs two bots (base reference vs experiment) for numTrials games, +//adapting experiment bot's PUCT parameters toward higher win rates. + +#include "../core/global.h" +#include "../core/config_parser.h" +#include "../core/logger.h" +#include "../core/rand.h" +#include "../search/searchparams.h" +#include "../program/setup.h" +#include "../program/play.h" +#include "../program/playsettings.h" +#include "../command/commandline.h" +#include "../main.h" + +#include "../qrstune/QRSOptimizer.h" +#include "../command/tuneparams.h" +#include "../core/test.h" + +#include +#include +#include +#include "../core/timer.h" + +using namespace std; + +static std::atomic shouldStop(false); +static void signalHandler(int signal) +{ + if(signal == SIGINT || signal == SIGTERM) { + shouldStop.store(true); + } +} + +struct TuneDimension { + const char* name; + const char* shortName; + double defaultMin; + double defaultMax; + const char* minKey; + const char* maxKey; + double SearchParams::* field; +}; + +static const TuneDimension tuneDims[] = { + {"cpuctExploration", "Cpuct", 0.5, 1.5, + "cpuctExplorationMin", "cpuctExplorationMax", &SearchParams::cpuctExploration}, + {"fpuReductionMax", "Fpu", 0.0, 0.5, + "fpuReductionMaxMin", "fpuReductionMaxMax", &SearchParams::fpuReductionMax}, + {"rootFpuReductionMax", "RootFpu", 0.0, 0.4, + "rootFpuReductionMaxMin", "rootFpuReductionMaxMax", &SearchParams::rootFpuReductionMax}, +}; +static const int nAllowedDims = (int)(sizeof(tuneDims) / sizeof(tuneDims[0])); + +//Map QRS-Tune normalized coordinate x in [-1,+1] to real PUCT value. +static double qrsDimToReal(int dim, double x, const double* mins, const double* maxs) { + double center = (mins[dim] + maxs[dim]) * 0.5; + double radius = (maxs[dim] - mins[dim]) * 0.5; + return center + x * radius; +} + +static const double Z_95 = 1.96; + +// Compute 95% CI bounds for each parameter in real (non-normalized) coordinates. +// Returns false if CIs are unavailable. Fills ciLo/ciHi/clamped arrays of size nDims. +static bool computeParamCIs(int nDims, + const QRSTune::QRSTuner& tuner, + const vector& vBest, + const double* mins, const double* maxs, + double* ciLo, double* ciHi, bool* clamped) { + vector se(nDims); + bool hasCIs = tuner.model().computeOptimumSE( + tuner.buffer().xs(), se.data(), clamped); + if(!hasCIs) return false; + for(int d = 0; d < nDims; d++) { + double radius = (maxs[d] - mins[d]) * 0.5; + double seReal = se[d] * radius; + double bestReal = qrsDimToReal(d, vBest[d], mins, maxs); + ciLo[d] = bestReal - Z_95 * seReal; + ciHi[d] = bestReal + Z_95 * seReal; + } + return true; +} + +//Print ASCII-art regression curve for each PUCT dimension. +//For dimension d: fix all other dims at vBest, sweep d from -1 to +1. +static void printRegressionCurves(const vector& activeDims, + const QRSTune::QRSTuner& tuner, + const vector& vBest, + const double* mins, const double* maxs, + Logger& logger) { + const int plotW = 60; + const int plotH = 20; + const int nDims = (int)activeDims.size(); + double bestWinRate = tuner.model().predict(vBest.data()); + + for(int dim = 0; dim < nDims; dim++) { + vector canvas(plotH, string(plotW, ' ')); + + int bestCol = (int)((vBest[dim] + 1.0) / 2.0 * (plotW - 1) + 0.5); + bestCol = max(0, min(plotW - 1, bestCol)); + + vector xSlice(vBest); + for(int col = 0; col < plotW; col++) { + double t = -1.0 + 2.0 * col / (plotW - 1); + xSlice[dim] = t; + double winRate = tuner.model().predict(xSlice.data()); + + int row = (int)((1.0 - winRate) * (plotH - 1) + 0.5); + row = max(0, min(plotH - 1, row)); + canvas[row][col] = (col == bestCol) ? '*' : 'o'; + } + + double bestReal = qrsDimToReal(dim, vBest[dim], mins, maxs); + logger.write(""); + logger.write( + "[Dim " + Global::intToString(dim) + "] " + activeDims[dim].name + + " (best QRS=" + Global::strprintf("%.3f", vBest[dim]) + + " -> real=" + Global::strprintf("%.3f", bestReal) + + ", est.winrate=" + Global::strprintf("%.3f", bestWinRate) + ")" + ); + + for(int row = 0; row < plotH; row++) { + string label; + if(row == 0) label = "1.0 |"; + else if(row == plotH / 2) label = "0.5 |"; + else if(row == plotH - 1) label = "0.0 |"; + else label = " |"; + logger.write(label + canvas[row]); + } + logger.write(" +" + string(plotW, '-')); + + { + string line(plotW + 5, ' '); + const int off = 5; + auto place = [&](int col, const string& lbl) { + int pos = off + col - (int)lbl.size() / 2; + if(pos < 0) pos = 0; + for(int i = 0; i < (int)lbl.size() && pos + i < (int)line.size(); i++) + line[pos + i] = lbl[i]; + }; + place(0, Global::strprintf("%.3f", qrsDimToReal(dim, -1.0, mins, maxs))); + place(plotW / 2, Global::strprintf("%.3f", qrsDimToReal(dim, 0.0, mins, maxs))); + place(plotW - 1, Global::strprintf("%.3f", qrsDimToReal(dim, +1.0, mins, maxs))); + size_t last = line.find_last_not_of(' '); + logger.write(line.substr(0, last + 1)); + } + } + logger.write(""); +} + +int MainCmds::tuneparams(const vector& args) { + Board::initHash(); + ScoreValue::initTables(); + Rand seedRand; + + ConfigParser cfg; + string logFile; + string configFilePath; //Original config file path for suggested match command + try { + KataGoCommandLine cmd( + "Tune KataGo hyperparameters using sequential optimization (QRS-Tune).\n" + "Runs numTrials games between a fixed reference bot (bot0) and an\n" + "experiment bot (bot1) whose PUCT parameters are adapted each trial." + ); + cmd.addConfigFileArg("", "tune_params.cfg"); + + TCLAP::ValueArg logFileArg("", "log-file", "Log file to output to", false, string(), "FILE"); + cmd.add(logFileArg); + cmd.setShortUsageArgLimit(); + cmd.addOverrideConfigArg(); + + cmd.parseArgs(args); + logFile = logFileArg.getValue(); + cmd.getConfig(cfg); + configFilePath = cfg.getFileName(); + string suffix = " and/or command-line and query overrides"; + if(Global::isSuffix(configFilePath, suffix)) + configFilePath = Global::chopSuffix(configFilePath, suffix); + } + catch(TCLAP::ArgException& e) { + cerr << "Error: " << e.error() << " for argument " << e.argId() << endl; + return 1; + } + + Logger logger(&cfg); + logger.addFile(logFile); + logger.write("tune-params starting..."); + logger.write(string("Git revision: ") + Version::getGitRevision()); + + //Read tuning-specific config + int numTrials = cfg.getInt("numTrials", 1, 100000); + + //Resolve which dimension to tune from cfg's tuneDimension key. + int selectedDimIdx = TuneParams::resolveDimension(cfg); + vector activeDims = { tuneDims[selectedDimIdx] }; + const int nDims = (int)activeDims.size(); + logger.write("Tuning dimension: " + string(activeDims[0].name)); + + //Search ranges (configurable; defaults preserve prior behaviour) + vector qrsMins(nDims), qrsMaxs(nDims); + for(int d = 0; d < nDims; d++) { + qrsMins[d] = cfg.contains(activeDims[d].minKey) + ? cfg.getDouble(activeDims[d].minKey, -1e9, 1e9) + : activeDims[d].defaultMin; + qrsMaxs[d] = cfg.contains(activeDims[d].maxKey) + ? cfg.getDouble(activeDims[d].maxKey, -1e9, 1e9) + : activeDims[d].defaultMax; + if(qrsMins[d] >= qrsMaxs[d]) + throw StringError( + string("tune-params: ") + activeDims[d].minKey + " must be < " + activeDims[d].maxKey); + } + { + string rangeStr; + for(int d = 0; d < nDims; d++) { + if(d > 0) rangeStr += ", "; + rangeStr += string(activeDims[d].name) + "=[" + + Global::strprintf("%.4f", qrsMins[d]) + "," + Global::strprintf("%.4f", qrsMaxs[d]) + "]"; + } + logger.write("QRS ranges: " + rangeStr); + } + + //Load search params for both bots + vector paramss = Setup::loadParams(cfg, Setup::SETUP_FOR_MATCH); + if((int)paramss.size() < 2) + throw StringError("tune-params: config must define numBots = 2 (bot0 = reference, bot1 = experiment)"); + + //Model files + string nnModelFile0 = cfg.getString("nnModelFile0"); + string nnModelFile1 = cfg.getString("nnModelFile1"); + vector nnModelFiles = {nnModelFile0, nnModelFile1}; + + //Game runner setup + PlaySettings playSettings = PlaySettings::loadForMatch(cfg); + GameRunner* gameRunner = new GameRunner(cfg, playSettings, logger); + int maxBoardX = gameRunner->getGameInitializer()->getMaxBoardXSize(); + int maxBoardY = gameRunner->getGameInitializer()->getMaxBoardYSize(); + + //Initialize neural net inference + Setup::initializeSession(cfg); + const int expectedConcurrentEvals = max(paramss[0].numThreads, paramss[1].numThreads); + vector expectedSha256s; + vector nnEvals = Setup::initializeNNEvaluators( + nnModelFiles, nnModelFiles, expectedSha256s, + cfg, logger, seedRand, + expectedConcurrentEvals, + maxBoardX, maxBoardY, + /*defaultMaxBatchSize=*/-1, + /*defaultRequireExactNNLen=*/(maxBoardX == gameRunner->getGameInitializer()->getMinBoardXSize() && + maxBoardY == gameRunner->getGameInitializer()->getMinBoardYSize()), + /*disableFP16=*/false, + Setup::SETUP_FOR_MATCH + ); + logger.write("Loaded neural nets"); + + //Signal handling for graceful shutdown + if(!std::atomic_is_lock_free(&shouldStop)) + throw StringError("shouldStop is not lock free, signal-quitting mechanism for terminating will NOT work!"); + std::signal(SIGINT, signalHandler); + std::signal(SIGTERM, signalHandler); + + //QRS-Tune setup + uint64_t qrsSeed = seedRand.nextUInt64(); + QRSTune::QRSTuner tuner(nDims, qrsSeed, numTrials); + + bool verbose = cfg.contains("verbose") ? cfg.getBool("verbose") : false; + if(verbose) + tuner.setLogger(&logger); + + const string gameSeedBase = Global::uint64ToHexString(seedRand.nextUInt64()); + + int wins = 0, losses = 0, draws = 0, nullGames = 0; + int reportInterval = std::max(1, numTrials / 10); + ClockTimer timer; + + logger.write("Starting " + Global::intToString(numTrials) + " tuning trials"); + + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + + SearchParams expParams = paramss[1]; + for(int d = 0; d < nDims; d++) + expParams.*(activeDims[d].field) = qrsDimToReal(d, sample[d], qrsMins.data(), qrsMaxs.data()); + + //Alternate colors to remove first-move advantage bias + bool expIsBlack = (trial % 2 == 0); + MatchPairer::BotSpec botSpecB, botSpecW; + if(expIsBlack) { + botSpecB.botIdx = 1; + botSpecB.botName = "experiment"; + botSpecB.nnEval = nnEvals[1]; + botSpecB.baseParams = expParams; + botSpecW.botIdx = 0; + botSpecW.botName = "base"; + botSpecW.nnEval = nnEvals[0]; + botSpecW.baseParams = paramss[0]; + } else { + botSpecB.botIdx = 0; + botSpecB.botName = "base"; + botSpecB.nnEval = nnEvals[0]; + botSpecB.baseParams = paramss[0]; + botSpecW.botIdx = 1; + botSpecW.botName = "experiment"; + botSpecW.nnEval = nnEvals[1]; + botSpecW.baseParams = expParams; + } + + string seed = gameSeedBase + ":" + Global::intToString(trial); + auto shouldStopFunc = []() noexcept { return shouldStop.load(); }; + + FinishedGameData* gameData = gameRunner->runGame( + seed, botSpecB, botSpecW, + /*forkData=*/NULL, + /*startPosSample=*/NULL, + logger, + shouldStopFunc, + /*shouldPause=*/NULL, + /*checkForNewNNEval=*/NULL, + /*afterInitialization=*/NULL, + /*onEachMove=*/NULL + ); + + double outcome = 0.5; + if(gameData != NULL) { + Player winner = gameData->endHist.winner; + if(expIsBlack) { + if(winner == P_BLACK) { outcome = 1.0; wins++; } + else if(winner == P_WHITE) { outcome = 0.0; losses++; } + else { outcome = 0.5; draws++; } + } else { + if(winner == P_WHITE) { outcome = 1.0; wins++; } + else if(winner == P_BLACK) { outcome = 0.0; losses++; } + else { outcome = 0.5; draws++; } + } + delete gameData; + } else { + nullGames++; + logger.write("Warning: trial " + Global::intToString(trial) + " returned null game data"); + //Too many null games corrupt the optimizer with noise — abort early. + if(trial + 1 >= 20 && nullGames * 20 > trial + 1) { + logger.write("Error: >5% of games returned null data (" + + Global::intToString(nullGames) + "/" + Global::intToString(trial + 1) + + "), aborting. Check model files and config."); + break; + } + continue; //Do not feed null outcomes to the optimizer + } + + tuner.addResult(sample, outcome, expIsBlack ? " as black" : " as white"); + + if(shouldStop.load()) + break; + + //Progress report every 10% of trials + if((trial + 1) % reportInterval == 0) { + vector vBest = tuner.bestCoords(); + + int completed = trial + 1; + int pct = completed * 100 / numTrials; + double elapsed = timer.getSeconds(); + int etaSec = (int)(elapsed / completed * (numTrials - completed)); + string eta; + if(etaSec < 60) + eta = Global::intToString(etaSec) + "s"; + else if(etaSec < 3600) + eta = Global::intToString(etaSec / 60) + "m" + Global::intToString(etaSec % 60) + "s"; + else + eta = Global::intToString(etaSec / 3600) + "h" + Global::intToString((etaSec % 3600) / 60) + "m"; + + { + string paramStr; + vector ciLo(nDims), ciHi(nDims); + std::unique_ptr clampedRaw(new bool[nDims]); + if(computeParamCIs(nDims, tuner, vBest, qrsMins.data(), qrsMaxs.data(), + ciLo.data(), ciHi.data(), clampedRaw.get())) { + for(int d = 0; d < nDims; d++) { + paramStr += Global::strprintf(" %s=[%.4f, %.4f]", activeDims[d].shortName, ciLo[d], ciHi[d]); + if(clampedRaw[d]) paramStr += "*"; + } + } else { + for(int d = 0; d < nDims; d++) + paramStr += Global::strprintf(" %s=%.4f", activeDims[d].shortName, + qrsDimToReal(d, vBest[d], qrsMins.data(), qrsMaxs.data())); + } + logger.write(Global::strprintf( + "[%d%%] %d/%d | W=%d L=%d D=%d |%s | ETA %s", + pct, completed, numTrials, wins, losses, draws, paramStr.c_str(), eta.c_str() + )); + } + } + } + + //Final result + vector vBest = tuner.bestCoords(); + + logger.write(""); + logger.write("=== tune-params Results ==="); + logger.write( + "Trials: " + Global::intToString(numTrials) + + " Wins: " + Global::intToString(wins) + + " Losses: " + Global::intToString(losses) + + " Draws: " + Global::intToString(draws) + ); + { + vector ciLo(nDims), ciHi(nDims); + std::unique_ptr clampedRaw(new bool[nDims]); + bool hasCIs = computeParamCIs(nDims, tuner, vBest, qrsMins.data(), qrsMaxs.data(), + ciLo.data(), ciHi.data(), clampedRaw.get()); + + for(int d = 0; d < nDims; d++) { + double bestReal = qrsDimToReal(d, vBest[d], qrsMins.data(), qrsMaxs.data()); + if(hasCIs) { + string warn = clampedRaw[d] ? " [boundary - CI may be unreliable]" : ""; + logger.write(Global::strprintf("Best %-25s = %.4f 95%%CI [%.4f, %.4f]%s", + activeDims[d].name, bestReal, ciLo[d], ciHi[d], warn.c_str())); + } else { + logger.write(Global::strprintf("Best %-25s = %.4f (CI unavailable)", + activeDims[d].name, bestReal)); + } + } + } + { + string rawStr; + for(int d = 0; d < nDims; d++) { + if(d > 0) rawStr += ", "; + rawStr += Global::doubleToString(vBest[d]); + } + logger.write("QRS raw coordinates: [" + rawStr + "]"); + } + + //ASCII-art regression curves (one per PUCT dimension) + printRegressionCurves(activeDims, tuner, vBest, qrsMins.data(), qrsMaxs.data(), logger); + + //Suggested match command for verification + { + string overrides = "botName0=tuned,botName1=default,"; + for(int d = 0; d < nDims; d++) + overrides += string(activeDims[d].name) + "0=" + + Global::strprintf("%.4f", qrsDimToReal(d, vBest[d], qrsMins.data(), qrsMaxs.data())) + ","; + overrides += "numGameThreads=8,numGamesTotal=200"; + overrides += ",nnModelFile0=" + nnModelFile0 + ",nnModelFile1=" + nnModelFile1; + logger.write(""); + logger.write("To verify, run a match of tuned (bot0) vs default (bot1):"); + logger.write( + "./katago match -config " + configFilePath + + " -override-config \"" + overrides + "\"" + + " -log-file match.log -sgf-output-dir match_sgfs/" + ); + } + + //Cleanup + delete gameRunner; + for(NNEvaluator* eval : nnEvals) + delete eval; + + NeuralNet::globalCleanup(); + ScoreValue::freeTables(); + return 0; +} + +int TuneParams::resolveDimension(ConfigParser& cfg) { + string tuneDimName = cfg.getString("tuneDimension"); + for(int i = 0; i < nAllowedDims; i++) { + if(tuneDimName == tuneDims[i].name) return i; + } + string allowed; + for(int i = 0; i < nAllowedDims; i++) { + if(i > 0) allowed += ", "; + allowed += tuneDims[i].name; + } + throw StringError("tune-params: tuneDimension = '" + tuneDimName + + "' not recognized; expected one of: " + allowed); +} + +void TuneParams::runTests() { + cout << "Running TuneParams tests" << endl; + + // Positive: each allowed name maps to its expected index. + { + ConfigParser cfg(std::map{{"tuneDimension", "cpuctExploration"}}); + testAssert(TuneParams::resolveDimension(cfg) == 0); + } + { + ConfigParser cfg(std::map{{"tuneDimension", "fpuReductionMax"}}); + testAssert(TuneParams::resolveDimension(cfg) == 1); + } + { + ConfigParser cfg(std::map{{"tuneDimension", "rootFpuReductionMax"}}); + testAssert(TuneParams::resolveDimension(cfg) == 2); + } + + // Negative: missing tuneDimension key throws (any exception type — the + // exact type belongs to ConfigParser and is not this unit's contract). + { + ConfigParser cfg(std::map{}); + bool threw = false; + try { (void)TuneParams::resolveDimension(cfg); } + catch(const std::exception&) { threw = true; } + testAssert(threw); + } + + // Negative: unrecognized value throws StringError mentioning the offending + // name and at least one allowed name. + { + ConfigParser cfg(std::map{{"tuneDimension", "totallyMadeUpName"}}); + bool threwStringError = false; + string msg; + try { (void)TuneParams::resolveDimension(cfg); } + catch(const StringError& e) { threwStringError = true; msg = e.what(); } + testAssert(threwStringError); + testAssert(msg.find("totallyMadeUpName") != string::npos); + testAssert(msg.find("cpuctExploration") != string::npos); + } +} diff --git a/cpp/command/tuneparams.h b/cpp/command/tuneparams.h new file mode 100644 index 000000000..f8eccad20 --- /dev/null +++ b/cpp/command/tuneparams.h @@ -0,0 +1,16 @@ +#ifndef COMMAND_TUNEPARAMS_H_ +#define COMMAND_TUNEPARAMS_H_ + +#include "../core/config_parser.h" + +namespace TuneParams { + // Returns the index of the selected dimension within tuneDims[]. + // Throws StringError if cfg's tuneDimension value is not in the allowlist. + // Propagates ConfigParser's exception if tuneDimension is missing. + int resolveDimension(ConfigParser& cfg); + + // Self-tests for the dim-resolver. Wired into MainCmds::runtests. + void runTests(); +} + +#endif // COMMAND_TUNEPARAMS_H_ diff --git a/cpp/configs/tune_params_example.cfg b/cpp/configs/tune_params_example.cfg new file mode 100644 index 000000000..e051afd06 --- /dev/null +++ b/cpp/configs/tune_params_example.cfg @@ -0,0 +1,129 @@ +# Example config for tune-params (QRS-Tune PUCT hyperparameter tuning) +# This is an example template config for the "tune-params" subcommand of KataGo. e.g: +# ./katago tune-params -config configs/tune_params_example.cfg -log-file tune.log +# +# This command runs sequential head-to-head matches between a fixed reference bot (bot0) +# and an experiment bot (bot1) whose PUCT parameters are adapted each trial using +# QRS-Tune (Quadratic Regression Sequential optimization). +# +# After all trials, it reports the best-found value for the selected +# tuneDimension, along with an ASCII regression curve showing the +# parameter's estimated effect on win rate. +# +# See gtp config and match config for descriptions of most search and GPU params. + +# Tuning------------------------------------------------------------------------------------ + +# Total number of tuning trials (games). More trials = better estimates but slower. +# A few hundred trials is a reasonable starting point; 1000+ for higher confidence. +numTrials = 500 + +# Which PUCT parameter to tune. Required. One of: +# cpuctExploration - PUCT exploration constant +# fpuReductionMax - FPU reduction at non-root nodes +# rootFpuReductionMax - FPU reduction at the root node +tuneDimension = cpuctExploration + +# Search range for the selected dimension. Only the pair that matches +# `tuneDimension` is read; the others are ignored. If omitted, the +# defaults below are used: +# cpuctExploration: [0.5, 1.5] +# fpuReductionMax: [0.0, 0.5] +# rootFpuReductionMax: [0.0, 0.4] +# +# cpuctExplorationMin = 0.5 ; cpuctExplorationMax = 1.5 +# fpuReductionMaxMin = 0.0 ; fpuReductionMaxMax = 0.5 +# rootFpuReductionMaxMin = 0.0 ; rootFpuReductionMaxMax = 0.4 + +# Logs------------------------------------------------------------------------------------ + +logSearchInfo = false +logMoves = false +logGamesEvery = 100 +logToStdout = true +# verbose = false # Log QRS optimizer internals (refits, pruning, sample coords) + +# Bots------------------------------------------------------------------------------------- +# Exactly 2 bots are required: bot0 = reference (fixed params), bot1 = experiment (tuned params). +# Both bots can use the same model or different models. + +numBots = 2 +botName0 = base +botName1 = experiment + +# Neural net model files - can be the same model for both bots +nnModelFile0 = PATH_TO_MODEL +nnModelFile1 = PATH_TO_MODEL + +# Match----------------------------------------------------------------------------------- + +# tune-params runs one game per trial sequentially (QRS-Tune is inherently serial), +# so numGameThreads is not used. Keeping it commented out to avoid confusion. +#numGameThreads = 1 +maxMovesPerGame = 1200 + +allowResignation = true +resignThreshold = -0.95 +resignConsecTurns = 6 + +# Rules------------------------------------------------------------------------------------ +# Use a single fixed ruleset for consistent tuning results. + +koRules = POSITIONAL +scoringRules = AREA +taxRules = NONE +multiStoneSuicideLegals = true +hasButtons = false + +bSizes = 19 +bSizeRelProbs = 1 +komiMean = 7 +handicapProb = 0.0 +handicapCompensateKomiProb = 1.0 + +# Search limits----------------------------------------------------------------------------------- +# Use fixed visits (not time) for reproducible results across trials. + +maxVisits = 500 +# maxVisits0 = 500 +# maxVisits1 = 500 + +numSearchThreads = 1 + +# GPU Settings------------------------------------------------------------------------------- + +nnMaxBatchSize = 32 +nnCacheSizePowerOfTwo = 21 +nnMutexPoolSizePowerOfTwo = 17 +nnRandomize = true +numNNServerThreadsPerModel = 1 + +# Root move selection and biases------------------------------------------------------------------------------ + +chosenMoveTemperatureEarly = 0.60 +chosenMoveTemperature = 0.20 + +# Internal params------------------------------------------------------------------------------ +# These are the FIXED params for bot0 (reference). Bot1's selected +# tuneDimension parameter will be overridden by the optimizer. + +# cpuctExploration = 0.9 +# cpuctUtilityStdevPriorWeight = 2.0 +# fpuReductionMax = 0.2 +# rootFpuReductionMax = 0.1 +# valueWeightExponent = 0.25 +# subtreeValueBiasFactor = 0.45 +# subtreeValueBiasWeightExponent = 0.85 + +# GTP-equivalent defaults---------------------------------------------------------------------- +# These features default to off in match mode but on in GTP mode. +# Enable them so tuning results reflect actual GTP-strength play. + +cpuctUtilityStdevScale = 0.85 +policyOptimism = 1.0 +useUncertainty = true +useNoisePruning = true +useNonBuggyLcb = true +useGraphSearch = true +rootSymmetryPruning = true +fpuParentWeightByVisitedPolicy = true diff --git a/cpp/core/elo.cpp b/cpp/core/elo.cpp index b624ed22f..a2c12909d 100644 --- a/cpp/core/elo.cpp +++ b/cpp/core/elo.cpp @@ -1,6 +1,8 @@ #include "../core/elo.h" +#include #include +#include #include "../core/test.h" #include "../core/os.h" @@ -268,6 +270,112 @@ vector ComputeElos::computeElos( return elos; } +bool ComputeElos::computeBradleyTerryElo( + const vector& botNames, + const map, array>& pairStats, + vector& outElo, + vector& outStderr +) { + int N = (int)botNames.size(); + + map nameIdx; + for(int i = 0; i < N; i++) nameIdx[botNames[i]] = i; + + //w[i][j] = effective wins of i vs j (draws count 0.5) + vector> w(N, vector(N, 0.0)); + for(auto& kv : pairStats) { + auto itA = nameIdx.find(kv.first.first); + auto itB = nameIdx.find(kv.first.second); + if(itA == nameIdx.end() || itB == nameIdx.end()) continue; + int a = itA->second, b = itB->second; + w[a][b] += kv.second[0] + 0.5 * kv.second[2]; + w[b][a] += kv.second[1] + 0.5 * kv.second[2]; + } + + //theta[0] = 0 (reference, first bot), optimize theta[1..N-1] + vector theta(N, 0.0); + int M = N - 1; + + bool converged = (M == 0); + if(M > 0) { + vector grad(M); + vector> H(M, vector(M)); + vector> aug(M, vector(M+1)); + vector delta(M); + for(int iter = 0; iter < 200; iter++) { + fill(grad.begin(), grad.end(), 0.0); + for(int r = 0; r < M; r++) fill(H[r].begin(), H[r].end(), 0.0); + for(int i = 0; i < N; i++) { + for(int j = i+1; j < N; j++) { + double nij = w[i][j] + w[j][i]; + if(nij <= 0.0) continue; + double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i])); + double fish = nij * sigma * (1.0 - sigma); + double gij = w[i][j] - nij * sigma; + if(i > 0) { grad[i-1] += gij; H[i-1][i-1] -= fish; } + if(j > 0) { grad[j-1] -= gij; H[j-1][j-1] -= fish; } + if(i > 0 && j > 0) { H[i-1][j-1] += fish; H[j-1][i-1] += fish; } + } + } + //Solve H*delta = -grad via Gaussian elimination + for(int r = 0; r < M; r++) { + for(int c = 0; c < M; c++) aug[r][c] = H[r][c]; + aug[r][M] = -grad[r]; + } + for(int col = 0; col < M; col++) { + int piv = col; + for(int r = col+1; r < M; r++) + if(fabs(aug[r][col]) > fabs(aug[piv][col])) piv = r; + swap(aug[col], aug[piv]); + //Singular column: bot has no games against others in this subproblem. + //Skip elimination and leave delta[col]=0 (no Elo update for this bot). + if(fabs(aug[col][col]) < 1e-12) continue; + double inv = 1.0 / aug[col][col]; + for(int r = col+1; r < M; r++) { + double f = aug[r][col] * inv; + for(int c = col; c <= M; c++) aug[r][c] -= f * aug[col][c]; + } + } + fill(delta.begin(), delta.end(), 0.0); + for(int r = M-1; r >= 0; r--) { + double s = aug[r][M]; + for(int c = r+1; c < M; c++) s -= aug[r][c] * delta[c]; + if(fabs(aug[r][r]) > 1e-12) delta[r] = s / aug[r][r]; + } + double maxDelta = 0.0; + for(int r = 0; r < M; r++) { + theta[r+1] += delta[r]; + maxDelta = max(maxDelta, fabs(delta[r])); + } + if(maxDelta < 1e-6) { converged = true; break; } + } + } + + //Convert log-strength to Elo relative to bot 0 + outElo.resize(N); + outStderr.resize(N, 0.0); + for(int i = 0; i < N; i++) + outElo[i] = (theta[i] - theta[0]) * ELO_PER_LOG_GAMMA; + + //Fisher information diagonal -> stderr + //A fish of 0 means no pairwise data for this bot, so the Elo is + //unconstrained. Use a large sentinel value rather than 0 (which + //would falsely imply perfect confidence). + for(int i = 1; i < N; i++) { + double fish = 0.0; + for(int j = 0; j < N; j++) { + if(j == i) continue; + double nij = w[i][j] + w[j][i]; + if(nij <= 0.0) continue; + double sigma = 1.0 / (1.0 + exp(theta[j] - theta[i])); + fish += nij * sigma * (1.0 - sigma); + } + if(fish > 0.0) outStderr[i] = ELO_PER_LOG_GAMMA / sqrt(fish); + else outStderr[i] = 1e18; + } + return converged; +} + static bool approxEqual(double x, double y, double tolerance) { return std::fabs(x - y) < tolerance; } diff --git a/cpp/core/elo.h b/cpp/core/elo.h index ff4ad4f8d..883e020b9 100644 --- a/cpp/core/elo.h +++ b/cpp/core/elo.h @@ -3,6 +3,9 @@ #include "../core/global.h" +#include +#include + namespace ComputeElos { STRUCT_NAMED_PAIR(double,firstWins,double,secondWins,WLRecord); @@ -30,6 +33,16 @@ namespace ComputeElos { //What's the probability of winning correspnding to this elo difference? double probWin(double eloDiff); + //Bradley-Terry MLE Elo via Newton-Raphson, for symmetric pairwise W/L/D data. + //pairStats: {nameA,nameB} -> {winsA, winsB, draws}, nameA < nameB lexicographically. + //Draws count as 0.5 wins for each side. Returns true if converged. + bool computeBradleyTerryElo( + const std::vector& botNames, + const std::map, std::array>& pairStats, + std::vector& outElo, + std::vector& outStderr + ); + void runTests(); } diff --git a/cpp/core/fancymath.cpp b/cpp/core/fancymath.cpp index 4481ae463..13bab76f4 100644 --- a/cpp/core/fancymath.cpp +++ b/cpp/core/fancymath.cpp @@ -148,6 +148,23 @@ double FancyMath::binaryCrossEntropy(double predProb, double targetProb, double return targetProb * (-log(predProb)) + (1.0-targetProb) * (-log(reverseProb)); } +void FancyMath::wilsonCI95(double wins, double n, double& lo, double& hi) { + if(n <= 0) { lo = 0; hi = 1; return; } + const double z = 1.96; + double p = wins / n; + double denom = 1.0 + z*z/n; + double center = (p + z*z/(2*n)) / denom; + double margin = z * sqrt(p*(1-p)/n + z*z/(4*n*n)) / denom; + lo = center - margin; + hi = center + margin; +} + +double FancyMath::oneTailedPValue(double wins, double n) { + if(n <= 0) return 0.5; + double z = (wins - 0.5*n) / (0.5*sqrt(n)); + return 0.5 * erfc(z / sqrt(2.0)); +} + #define APPROX_EQ(x,y,tolerance) testApproxEq((x),(y),(tolerance), #x, #y, __FILE__, __LINE__) static void testApproxEq(double x, double y, double tolerance, const char* msgX, const char* msgY, const char *file, int line) { diff --git a/cpp/core/fancymath.h b/cpp/core/fancymath.h index 5155450ac..8afca5725 100644 --- a/cpp/core/fancymath.h +++ b/cpp/core/fancymath.h @@ -26,6 +26,14 @@ namespace FancyMath { //predProb is scaled into the range [epsilon,1.0-epsilon]. double binaryCrossEntropy(double predProb, double targetProb, double epsilon); + //Wilson score 95% two-tailed confidence interval for binomial proportion. + //Draws should be counted as 0.5 wins before calling. + void wilsonCI95(double wins, double n, double& lo, double& hi); + + //One-tailed p-value for H0: winrate=0.5 vs H1: winrate>0.5, using normal approximation. + //Small values indicate the first player wins significantly more than 50%. + double oneTailedPValue(double wins, double n); + void runTests(); } diff --git a/cpp/main.cpp b/cpp/main.cpp index 0fcc36dea..f1f7c1553 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -33,6 +33,7 @@ genconfig : User-friendly interface to generate a config with rules and automati contribute : Connect to online distributed KataGo training and run perpetually contributing selfplay games. match : Run self-play match games based on a config, more efficient than gtp due to batching. +tune-params : Tune KataGo PUCT hyperparameters via sequential optimization (QRS-Tune). version : Print version and exit. analysis : Runs an engine designed to analyze entire games in parallel. @@ -87,6 +88,8 @@ static int handleSubcommand(const string& subcommand, const vector& args return MainCmds::tuner(subArgs); else if(subcommand == "match") return MainCmds::match(subArgs); + else if(subcommand == "tune-params") + return MainCmds::tuneparams(subArgs); else if(subcommand == "selfplay") return MainCmds::selfplay(subArgs); else if(subcommand == "testgpuerror") diff --git a/cpp/main.h b/cpp/main.h index 3f8ad78d4..6fcf7499f 100644 --- a/cpp/main.h +++ b/cpp/main.h @@ -10,6 +10,7 @@ namespace MainCmds { int gtp(const std::vector& args); int tuner(const std::vector& args); int match(const std::vector& args); + int tuneparams(const std::vector& args); int selfplay(const std::vector& args); int testgpuerror(const std::vector& args); diff --git a/cpp/qrstune/QRSOptimizer.cpp b/cpp/qrstune/QRSOptimizer.cpp new file mode 100644 index 000000000..6c3bafd78 --- /dev/null +++ b/cpp/qrstune/QRSOptimizer.cpp @@ -0,0 +1,984 @@ +// qrstune/QRSOptimizer.cpp +// +// QRS-Tune: Quadratic Response Surface optimizer for binary-outcome tuning. +// +// Models win probability as sigmoid(phi(x)^T * beta) where phi(x) is a +// quadratic feature map: [1, x_i, x_i^2, x_i*x_j]. The model is fit via +// Newton-Raphson MAP estimation with L2 regularization. The MAP optimum +// of the fitted quadratic surface is used as the next evaluation point, +// with decaying Gaussian noise for exploration. + +#include "../qrstune/QRSOptimizer.h" + +#include +#include +#include + +#include "../core/global.h" +#include "../core/logger.h" +#include "../core/test.h" + +using namespace std; + +// Pivot values smaller than this are treated as singular +static const double SINGULAR_THRESHOLD = 1e-12; + +// Newton-Raphson stops when the largest coefficient change is below this +static const double CONVERGENCE_THRESHOLD = 1e-7; + +// Sigmoid is clamped to 0 or 1 beyond this magnitude to avoid overflow +static const double SIGMOID_CLAMP = 40.0; + +// ============================================================ +// Free functions +// ============================================================ + +int QRSTune::numFeatures(int D) { + return 1 + D + D * (D + 1) / 2; +} + +// Feature layout for D dimensions: +// [intercept(1), linear(D), quadratic(D), cross-terms(D*(D-1)/2)] +// Example for D=2, x=[a,b]: phi = [1, a, b, a^2, b^2, a*b] +void QRSTune::computeFeatures(int D, const double* x, double* phi) { + int k = 0; + phi[k++] = 1.0; + for(int i = 0; i < D; i++) phi[k++] = x[i]; + for(int i = 0; i < D; i++) phi[k++] = x[i] * x[i]; + for(int i = 0; i < D; i++) + for(int j = i + 1; j < D; j++) + phi[k++] = x[i] * x[j]; +} + +double QRSTune::sigmoid(double z) { + if(z > SIGMOID_CLAMP) return 1.0; + if(z < -SIGMOID_CLAMP) return 0.0; + return 1.0 / (1.0 + exp(-z)); +} + +// Partial-pivot Gaussian elimination: solves Ax = b in-place. +// On return, b contains the solution x. A is destroyed. +// Returns false if A is singular (pivot below SINGULAR_THRESHOLD). +bool QRSTune::gaussianSolve(int F, vector>& A, vector& b) { + // Forward elimination with partial pivoting + for(int col = 0; col < F; col++) { + int piv = col; + for(int r = col + 1; r < F; r++) + if(fabs(A[r][col]) > fabs(A[piv][col])) piv = r; + swap(A[col], A[piv]); + swap(b[col], b[piv]); + if(fabs(A[col][col]) < SINGULAR_THRESHOLD) return false; + double inv = 1.0 / A[col][col]; + for(int r = col + 1; r < F; r++) { + double mult = A[r][col] * inv; + for(int c = col; c < F; c++) A[r][c] -= mult * A[col][c]; + b[r] -= mult * b[col]; + } + } + // Back substitution + for(int r = F - 1; r >= 0; r--) { + for(int c = r + 1; c < F; c++) b[r] -= A[r][c] * b[c]; + b[r] /= A[r][r]; + } + return true; +} + +// ============================================================ +// QRSModel +// ============================================================ + +QRSTune::QRSModel::QRSModel() + :D_(0), + F_(0), + l2_(0.1) +{} + +QRSTune::QRSModel::QRSModel(int D, double l2_reg) + :D_(D), + F_(numFeatures(D)), + beta_(numFeatures(D), 0.0), + l2_(l2_reg) +{} + +// Build the negative Hessian (Fisher info + L2 prior) at current beta. +// negH must be pre-sized to F_ x F_; contents are overwritten. +void QRSTune::QRSModel::buildNegHessian(const vector>& xs, + vector>& negH) const { + int N = (int)xs.size(); + for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0); + for(int f = 0; f < F_; f++) + negH[f][f] = l2_; + + vector phi(F_); + for(int n = 0; n < N; n++) { + computeFeatures(D_, xs[n].data(), phi.data()); + double logit = 0.0; + for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f]; + double p = sigmoid(logit); + double w = p * (1.0 - p); + for(int f = 0; f < F_; f++) + for(int g = f; g < F_; g++) + negH[f][g] += w * phi[f] * phi[g]; + } + for(int f = 0; f < F_; f++) + for(int g = f + 1; g < F_; g++) + negH[g][f] = negH[f][g]; +} + +// Build the D x D quadratic Hessian M and rhs for M*x = -linearCoeffs. +// M and rhs must be pre-sized; contents are overwritten. +void QRSTune::QRSModel::buildQuadHessian(vector>& M, + vector& rhs) const { + const double* linearCoeffs = beta_.data() + 1; + const double* quadCoeffs = beta_.data() + 1 + D_; + const double* crossCoeffs = beta_.data() + 1 + 2 * D_; + + for(int k = 0; k < D_; k++) { + fill(M[k].begin(), M[k].end(), 0.0); + M[k][k] = 2.0 * quadCoeffs[k]; + rhs[k] = -linearCoeffs[k]; + } + int idx = 0; + for(int i = 0; i < D_; i++) + for(int j = i + 1; j < D_; j++) { + M[i][j] += crossCoeffs[idx]; + M[j][i] += crossCoeffs[idx]; + idx++; + } +} + +// Newton-Raphson MAP estimation for L2-regularized quadratic logistic regression. +// +// Maximizes: sum_n [ y_n * log(p_n) + (1-y_n) * log(1-p_n) ] - (l2/2) * ||beta||^2 +// where p_n = sigmoid(phi(x_n)^T * beta). +// +// Each iteration: +// 1. Compute gradient and negative Hessian (including L2 prior) +// 2. Solve the Newton system: negH * delta = grad +// 3. Update: beta += delta +// 4. Stop when max |delta_f| < CONVERGENCE_THRESHOLD +void QRSTune::QRSModel::fit(const vector>& xs, + const vector& ys, + int max_iter) { + int N = (int)xs.size(); + if(N < F_) return; // underdetermined; keep prior beta = 0 + + // Reset to prior mean to avoid warm-start saturation cascade: a + // previously-extreme intercept makes w = p*(1-p) ≈ 0 for all samples, + // degenerating the Hessian to l2_*I and producing unbounded Newton steps. + fill(beta_.begin(), beta_.end(), 0.0); + + vector phi(F_); + vector grad(F_); + vector> negH(F_, vector(F_)); + + for(int iter = 0; iter < max_iter; iter++) { + // Build negH and compute gradient simultaneously + fill(grad.begin(), grad.end(), 0.0); + for(int f = 0; f < F_; f++) fill(negH[f].begin(), negH[f].end(), 0.0); + for(int f = 0; f < F_; f++) { + grad[f] = -l2_ * beta_[f]; + negH[f][f] = l2_; + } + + for(int n = 0; n < N; n++) { + computeFeatures(D_, xs[n].data(), phi.data()); + double logit = 0.0; + for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f]; + double p = sigmoid(logit); + double w = p * (1.0 - p); + double residual = ys[n] - p; + for(int f = 0; f < F_; f++) { + grad[f] += residual * phi[f]; + for(int g = f; g < F_; g++) + negH[f][g] += w * phi[f] * phi[g]; + } + } + for(int f = 0; f < F_; f++) + for(int g = f + 1; g < F_; g++) + negH[g][f] = negH[f][g]; + + // Solve Newton step: negH * delta = grad (grad is overwritten with delta) + if(!gaussianSolve(F_, negH, grad)) break; + + // Apply step and check convergence + double maxStep = 0.0; + for(int f = 0; f < F_; f++) { + beta_[f] += grad[f]; + maxStep = max(maxStep, fabs(grad[f])); + } + if(maxStep < CONVERGENCE_THRESHOLD) break; + } +} + +double QRSTune::QRSModel::predict(const double* x) const { + return sigmoid(score(x)); +} + +double QRSTune::QRSModel::score(const double* x) const { + vector phi(F_); + computeFeatures(D_, x, phi.data()); + double logit = 0.0; + for(int f = 0; f < F_; f++) logit += beta_[f] * phi[f]; + return logit; +} + +// Find the unconstrained optimum of the quadratic score surface, then clamp to [-1,+1]^D. +void QRSTune::QRSModel::mapOptimum(double* out_x) const { + // If any dimension is convex, the fit is dominated by noise in that + // dimension. Return the origin — a conservative, prior-centered choice. + if(hasConvexDim()) { + for(int i = 0; i < D_; i++) out_x[i] = 0.0; + return; + } + + vector> M(D_, vector(D_, 0.0)); + vector rhs(D_); + buildQuadHessian(M, rhs); + + if(!gaussianSolve(D_, M, rhs)) { + for(int i = 0; i < D_; i++) out_x[i] = 0.0; + return; + } + for(int i = 0; i < D_; i++) + out_x[i] = max(-1.0, min(1.0, rhs[i])); +} + +bool QRSTune::QRSModel::hasConvexDim() const { + const double* quadCoeffs = beta_.data() + 1 + D_; + for(int d = 0; d < D_; d++) + if(quadCoeffs[d] >= 0.0) return true; + return false; +} + +// Compute standard errors of the MAP optimum via the delta method. +// +// 1. Rebuild negH (Fisher info + L2 prior) at current beta. +// 2. Invert negH -> Cov(beta). +// 3. Compute unconstrained optimum x* and M^{-1}. +// 4. Build Jacobian J = dx*/dbeta via implicit differentiation. +// 5. Cov(x*) = J * Cov(beta) * J^T. +// 6. SE[d] = sqrt(Cov(x*)[d][d]). +bool QRSTune::QRSModel::computeOptimumSE(const vector>& xs, + double* se, + bool* clamped) const { + int N = (int)xs.size(); + if(N < F_) return false; + + // If any dim is convex, mapOptimum returns origin — CIs for the + // unconstrained critical point would be misleading. + if(hasConvexDim()) return false; + + // --- Step 1: Build negH (Fisher info + L2 prior) at current beta --- + vector> negH(F_, vector(F_, 0.0)); + buildNegHessian(xs, negH); + + // --- Step 2: Invert negH -> Cov(beta), column by column --- + vector> covBeta(F_, vector(F_, 0.0)); + for(int g = 0; g < F_; g++) { + auto negH_copy = negH; + vector e(F_, 0.0); + e[g] = 1.0; + if(!gaussianSolve(F_, negH_copy, e)) return false; + for(int f = 0; f < F_; f++) + covBeta[f][g] = e[f]; + } + + // --- Step 3: Compute unconstrained optimum x* and M^{-1} --- + vector> M(D_, vector(D_, 0.0)); + vector rhs(D_); + buildQuadHessian(M, rhs); + + // Save M for Jacobian computation before solve destroys it + auto M_saved = M; + if(!gaussianSolve(D_, M, rhs)) return false; + vector xStar(rhs); + + for(int d = 0; d < D_; d++) + clamped[d] = (fabs(xStar[d]) >= 1.0 - 1e-9); + + // Compute M^{-1} column by column + vector> Minv(D_, vector(D_, 0.0)); + for(int g = 0; g < D_; g++) { + auto M_copy = M_saved; + vector e(D_, 0.0); + e[g] = 1.0; + if(!gaussianSolve(D_, M_copy, e)) return false; + for(int d = 0; d < D_; d++) + Minv[d][g] = e[d]; + } + + // --- Step 4: Build Jacobian J (D x F) via implicit differentiation --- + vector> J(D_, vector(F_, 0.0)); + + // Linear coefficients: J[:, 1+i] = -Minv[:, i] + for(int i = 0; i < D_; i++) + for(int d = 0; d < D_; d++) + J[d][1 + i] = -Minv[d][i]; + + // Quadratic diagonal coefficients: J[:, 1+D+i] = -2 x*_i Minv[:, i] + for(int i = 0; i < D_; i++) + for(int d = 0; d < D_; d++) + J[d][1 + D_ + i] = -2.0 * xStar[i] * Minv[d][i]; + + // Cross-term coefficients: J[:, f] = -(x*_j Minv[:, i] + x*_i Minv[:, j]) + int idx = 0; + for(int i = 0; i < D_; i++) + for(int j = i + 1; j < D_; j++) { + for(int d = 0; d < D_; d++) + J[d][1 + 2 * D_ + idx] = -(xStar[j] * Minv[d][i] + xStar[i] * Minv[d][j]); + idx++; + } + + // Clamped dims: x*[d] is at boundary, so dx*_d/dbeta = 0 + for(int d = 0; d < D_; d++) + if(clamped[d]) + fill(J[d].begin(), J[d].end(), 0.0); + + // --- Step 5: Cov(x*) = J Cov(beta) J^T --- + vector> temp(D_, vector(F_, 0.0)); + for(int d = 0; d < D_; d++) + for(int g = 0; g < F_; g++) + for(int f = 0; f < F_; f++) + temp[d][g] += J[d][f] * covBeta[f][g]; + + vector> covX(D_, vector(D_, 0.0)); + for(int d1 = 0; d1 < D_; d1++) + for(int d2 = 0; d2 < D_; d2++) + for(int f = 0; f < F_; f++) + covX[d1][d2] += temp[d1][f] * J[d2][f]; + + // --- Step 6: Extract SEs --- + for(int d = 0; d < D_; d++) { + se[d] = covX[d][d] > 0.0 ? sqrt(covX[d][d]) : 0.0; + } + + return true; +} + +// ============================================================ +// QRSBuffer +// ============================================================ + +QRSTune::QRSBuffer::QRSBuffer(int min_keep, double prune_margin) + :min_keep_(min_keep), + prune_margin_(prune_margin) +{} + +void QRSTune::QRSBuffer::add(const vector& x, double y) { + xs_.push_back(x); + ys_.push_back(y); +} + +// Confidence-based pruning: drop samples whose predicted win rate +// is more than prune_margin_ below the best predicted win rate. +// The min_keep_ guard retains the oldest samples (in insertion order), +// preserving spatial diversity from early uniform exploration. +void QRSTune::QRSBuffer::prune(const QRSModel& model) { + int N = (int)xs_.size(); + // Never remove more than half the buffer in one pass to avoid + // destabilizing the quadratic fit with a sudden data cliff. + int keepFloor = max(min_keep_, N / 2); + if(N <= keepFloor) return; + + double bestPrediction = 0.0; + vector preds(N); + for(int i = 0; i < N; i++) { + preds[i] = model.predict(xs_[i].data()); + if(preds[i] > bestPrediction) bestPrediction = preds[i]; + } + double threshold = bestPrediction - prune_margin_; + + vector> newXs; + vector newYs; + for(int i = 0; i < N; i++) { + if(preds[i] >= threshold || (int)newXs.size() < keepFloor) { + newXs.push_back(std::move(xs_[i])); + newYs.push_back(ys_[i]); + } + } + xs_ = std::move(newXs); + ys_ = std::move(newYs); +} + +// ============================================================ +// QRSTuner +// ============================================================ + +QRSTune::QRSTuner::QRSTuner(int D, uint64_t seed, int total_trials, + double l2_reg, int refit_every, int prune_every, + double sigma_init, double sigma_fin) + :D_(D), + model_(D, l2_reg), + buffer_(max(20, total_trials / 50), 0.25), + rng_(seed), + trial_count_(0), + total_trials_(total_trials), + refit_every_(refit_every), + prune_every_(prune_every), + sigma_initial_(sigma_init), + sigma_final_(sigma_fin), + logger_(nullptr) +{} + +void QRSTune::QRSTuner::setLogger(Logger* logger) { + logger_ = logger; +} + +vector QRSTune::QRSTuner::nextSample() { + vector x(D_); + int F = model_.features(); + string sigmaStr; + + if(buffer_.size() < F + 1 || model_.hasConvexDim()) { + // Insufficient data for reliable fit — explore uniformly + uniform_real_distribution uni(-1.0, 1.0); + for(int i = 0; i < D_; i++) x[i] = uni(rng_); + sigmaStr = "uniform"; + } else { + // Start from MAP optimum, add decaying Gaussian noise for exploration + model_.mapOptimum(x.data()); + double progress = (double)trial_count_ / max(1, total_trials_ - 1); + double sigma = sigma_initial_ + progress * (sigma_final_ - sigma_initial_); + + normal_distribution noise(0.0, sigma); + for(int i = 0; i < D_; i++) + x[i] = max(-1.0, min(1.0, x[i] + noise(rng_))); + sigmaStr = Global::strprintf("%.4f", sigma); + } + + if(logger_) { + string msg = "QRS sample trial=" + to_string(trial_count_) + " sigma=" + sigmaStr + " x=["; + for(int i = 0; i < D_; i++) { + if(i > 0) msg += ","; + msg += Global::strprintf("%.3f", x[i]); + } + msg += "]"; + pendingLogMsg_ = msg; + } + + return x; +} + +void QRSTune::QRSTuner::addResult(const vector& x, double y, const string& logSuffix) { + if(!pendingLogMsg_.empty() && logger_) { + string label; + if(y == 1.0) label = "exp wins"; + else if(y == 0.0) label = "exp loses"; + else label = "draw"; + logger_->write(pendingLogMsg_ + " -> " + label + logSuffix); + pendingLogMsg_.clear(); + } + buffer_.add(x, y); + trial_count_++; + + if(trial_count_ % refit_every_ == 0 && buffer_.size() >= model_.features() + 1) { + int sizeBefore = buffer_.size(); + model_.fit(buffer_.xs(), buffer_.ys()); + int refit_count = trial_count_ / refit_every_; + if(refit_count % prune_every_ == 0) { + if(model_.hasConvexDim()) { + if(logger_) + logger_->write("QRS prune skipped: model has convex dims, predictions unreliable"); + } else { + buffer_.prune(model_); + if(logger_) + logger_->write("QRS prune: " + to_string(sizeBefore) + " -> " + to_string(buffer_.size()) + " samples"); + } + } + if(logger_) { + auto best = bestCoords(); + double winP = model_.predict(best.data()); + const auto& b = model_.beta(); + string diag = "QRS refit trial=" + to_string(trial_count_); + diag += " buf=" + to_string(buffer_.size()); + diag += " intercept=" + Global::strprintf("%.4f", b[0]); + diag += " quadDiag=["; + for(int d = 0; d < D_; d++) { + if(d > 0) diag += ","; + diag += Global::strprintf("%.4f", b[1 + D_ + d]); + } + diag += "]"; + diag += " concave=" + string(model_.hasConvexDim() ? "N" : "Y"); + diag += " bestQRS=["; + for(int d = 0; d < D_; d++) { + if(d > 0) diag += ","; + diag += Global::strprintf("%.3f", best[d]); + } + diag += "]"; + diag += " winP=" + Global::strprintf("%.4f", winP); + logger_->write(diag); + } + } +} + +vector QRSTune::QRSTuner::bestCoords() const { + vector best(D_); + model_.mapOptimum(best.data()); + return best; +} + +double QRSTune::QRSTuner::bestWinProb() const { + auto best = bestCoords(); + return model_.predict(best.data()); +} + +// ============================================================ +// Tests +// ============================================================ + +static bool approxEqual(double x, double y, double tolerance) { + return fabs(x - y) < tolerance; +} + +void QRSTune::runTests() { + cout << "Running QRSTune tests" << endl; + + // Test numFeatures: F = 1 + D + D*(D+1)/2 + // D=0: 1, D=1: 3, D=2: 6, D=3: 10 + { + testAssert(numFeatures(0) == 1); + testAssert(numFeatures(1) == 3); + testAssert(numFeatures(2) == 6); + testAssert(numFeatures(3) == 10); + } + + // Test computeFeatures: D=2, x=[0.5, -0.3] + // Expected: [1.0, 0.5, -0.3, 0.25, 0.09, -0.15] + { + double x[2] = {0.5, -0.3}; + double phi[6]; + computeFeatures(2, x, phi); + testAssert(approxEqual(phi[0], 1.0, 1e-15)); + testAssert(approxEqual(phi[1], 0.5, 1e-15)); + testAssert(approxEqual(phi[2], -0.3, 1e-15)); + testAssert(approxEqual(phi[3], 0.25, 1e-15)); + testAssert(approxEqual(phi[4], 0.09, 1e-15)); + testAssert(approxEqual(phi[5], -0.15, 1e-15)); + } + + // Test sigmoid + { + testAssert(approxEqual(sigmoid(0.0), 0.5, 1e-15)); + testAssert(sigmoid(50.0) == 1.0); + testAssert(sigmoid(-50.0) == 0.0); + testAssert(approxEqual(sigmoid(1.0), 1.0 / (1.0 + exp(-1.0)), 1e-12)); + // Beyond clamp threshold: exactly 0 or 1 + testAssert(sigmoid(SIGMOID_CLAMP + 1.0) == 1.0); + testAssert(sigmoid(-SIGMOID_CLAMP - 1.0) == 0.0); + // Moderate values: still fractional + testAssert(sigmoid(5.0) < 1.0); + testAssert(sigmoid(-5.0) > 0.0); + } + + // Test gaussianSolve: 2x2 system [[2,1],[1,3]] * x = [5,7] => x = [8/5, 9/5] + { + vector> A = {{2.0, 1.0}, {1.0, 3.0}}; + vector b = {5.0, 7.0}; + bool ok = gaussianSolve(2, A, b); + testAssert(ok); + testAssert(approxEqual(b[0], 8.0 / 5.0, 1e-12)); + testAssert(approxEqual(b[1], 9.0 / 5.0, 1e-12)); + } + + // Test gaussianSolve: 3x3 identity system + { + vector> A = {{1,0,0},{0,1,0},{0,0,1}}; + vector b = {3.0, -1.0, 7.0}; + bool ok = gaussianSolve(3, A, b); + testAssert(ok); + testAssert(approxEqual(b[0], 3.0, 1e-15)); + testAssert(approxEqual(b[1], -1.0, 1e-15)); + testAssert(approxEqual(b[2], 7.0, 1e-15)); + } + + // Test gaussianSolve: singular matrix returns false + { + vector> A = {{1.0, 2.0}, {2.0, 4.0}}; + vector b = {3.0, 6.0}; + bool ok = gaussianSolve(2, A, b); + testAssert(!ok); + } + + // Test QRSModel fit + predict: 1D separable data + // All samples at x=+0.8 win, all at x=-0.8 lose. + // After fitting, predict(+0.8) should be high and predict(-0.8) should be low. + { + QRSModel model(1, 0.1); + vector> xs; + vector ys; + for(int i = 0; i < 20; i++) { + xs.push_back({0.8}); + ys.push_back(1.0); + xs.push_back({-0.8}); + ys.push_back(0.0); + } + model.fit(xs, ys); + double xWin[] = {0.8}; + double xLose[] = {-0.8}; + double xMid[] = {0.0}; + double pWin = model.predict(xWin); + double pLose = model.predict(xLose); + testAssert(pWin > 0.7); + testAssert(pLose < 0.3); + // Midpoint should be near 0.5 + double pMid = model.predict(xMid); + testAssert(approxEqual(pMid, 0.5, 0.15)); + } + + // Test QRSModel mapOptimum: after fitting 1D win-at-positive data, + // the MAP optimum should be in the positive region (clamped to [−1,+1]). + { + QRSModel model(1, 0.1); + vector> xs; + vector ys; + for(int i = 0; i < 20; i++) { + xs.push_back({0.8}); + ys.push_back(1.0); + xs.push_back({-0.8}); + ys.push_back(0.0); + } + model.fit(xs, ys); + double bestX; + model.mapOptimum(&bestX); + // The optimum should have a higher predicted win rate than the anti-optimum + double negOne = -1.0; + testAssert(model.predict(&bestX) > model.predict(&negOne) + 0.1); + } + + // Test QRSModel 2D: wins cluster at (+0.5, +0.5), losses at (-0.5, -0.5) + { + QRSModel model(2, 0.1); + vector> xs; + vector ys; + for(int i = 0; i < 20; i++) { + xs.push_back({0.5, 0.5}); + ys.push_back(1.0); + xs.push_back({-0.5, -0.5}); + ys.push_back(0.0); + } + model.fit(xs, ys); + double xWin[] = {0.5, 0.5}; + double xLose[] = {-0.5, -0.5}; + double pWin = model.predict(xWin); + double pLose = model.predict(xLose); + testAssert(pWin > 0.7); + testAssert(pLose < 0.3); + } + + // Test QRSTuner end-to-end: 1D, deterministic seed, outcome strongly correlated + // with x > 0. After enough trials the best predicted win rate should exceed 0.5. + { + const int numTrials = 100; + QRSTuner tuner(1, /*seed=*/42, numTrials, + /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5); + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + // Strong signal: win when x > 0, lose when x < 0 + double outcome = (sample[0] > 0.0) ? 1.0 : 0.0; + tuner.addResult(sample, outcome); + } + testAssert(tuner.trialCount() == numTrials); + // The fitted model should recognize that positive x is better + double posOne = 1.0, negOne = -1.0; + testAssert(tuner.model().predict(&posOne) > tuner.model().predict(&negOne)); + } + + // Test computeOptimumSE: 1D with a concave peak (wins near center, losses at edges). + // This gives a negative quadratic coefficient, making M invertible. + { + QRSModel model(1, 0.01); + vector> xs; + vector ys; + for(int i = 0; i < 40; i++) { + xs.push_back({0.0}); ys.push_back(1.0); // center: wins + xs.push_back({0.8}); ys.push_back(0.0); // right edge: losses + xs.push_back({-0.8}); ys.push_back(0.0); // left edge: losses + } + model.fit(xs, ys); + double se[1]; + bool clamped[1]; + bool ok = model.computeOptimumSE(xs, se, clamped); + testAssert(ok); + testAssert(se[0] > 0.0); + testAssert(se[0] < 2.0); + } + + // Test computeOptimumSE: more data gives smaller SE + { + auto buildData = [](int reps, vector>& xs, vector& ys) { + for(int i = 0; i < reps; i++) { + xs.push_back({0.0}); ys.push_back(1.0); + xs.push_back({0.8}); ys.push_back(0.0); + xs.push_back({-0.8}); ys.push_back(0.0); + } + }; + + QRSModel modelSmall(1, 0.01); + QRSModel modelLarge(1, 0.01); + vector> xsSmall, xsLarge; + vector ysSmall, ysLarge; + buildData(15, xsSmall, ysSmall); + buildData(150, xsLarge, ysLarge); + modelSmall.fit(xsSmall, ysSmall); + modelLarge.fit(xsLarge, ysLarge); + double seSmall[1], seLarge[1]; + bool clampedSmall[1], clampedLarge[1]; + bool okSmall = modelSmall.computeOptimumSE(xsSmall, seSmall, clampedSmall); + bool okLarge = modelLarge.computeOptimumSE(xsLarge, seLarge, clampedLarge); + testAssert(okSmall && okLarge); + testAssert(seLarge[0] < seSmall[0]); + } + + // Test computeOptimumSE: insufficient data returns false + { + QRSModel model(1, 0.1); + vector> xs = {{0.5}, {-0.5}}; + vector ys = {1.0, 0.0}; + // N=2 < F=3, so fit() does nothing and beta stays zero + model.fit(xs, ys); + double se[1]; + bool clamped[1]; + bool ok = model.computeOptimumSE(xs, se, clamped); + testAssert(!ok); + } + + // Test QRSBuffer prune: verify pruning reduces buffer size + { + QRSModel model(1, 0.1); + vector> xs; + vector ys; + // Build data with a clear win region + for(int i = 0; i < 30; i++) { + xs.push_back({0.8}); + ys.push_back(1.0); + xs.push_back({-0.8}); + ys.push_back(0.0); + } + model.fit(xs, ys); + + QRSBuffer buffer(5, 0.10); // tight margin, keep at least 5 + for(int i = 0; i < 60; i++) { + buffer.add(xs[i], ys[i]); + } + testAssert(buffer.size() == 60); + buffer.prune(model); + // Should have pruned some low-quality samples + testAssert(buffer.size() < 60); + testAssert(buffer.size() >= 5); // min_keep + } + + // Test QRSTuner convergence with stochastic outcomes in a 1D quadratic + // landscape centered at x* = 0.35. + { + const double trueOpt = 0.35; + const int numTrials = 500; + mt19937_64 outcomeRng(99); + uniform_real_distribution uni01(0.0, 1.0); + + QRSTuner tuner(1, /*seed=*/42, numTrials, + /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5, + /*sigma_init=*/0.60, /*sigma_fin=*/0.20); + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + double dx = sample[0] - trueOpt; + double winProb = sigmoid(2.0 - 4.0 * dx * dx); + double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0; + tuner.addResult(sample, outcome); + } + vector best = tuner.bestCoords(); + testAssert(fabs(best[0] - trueOpt) < 0.15); + testAssert(tuner.bestWinProb() > 0.7); + } + + // Test: Nearly-flat 3D landscape can expose convex-fitting. + // + // When the true function is nearly flat and we have only ~128 stochastic + // trials fitting 10 parameters, noise can make the fitted quadratic convex + // (positive coefficient) in some dimensions. We scan seeds to find one + // that triggers this, then verify the invariant: the optimizer's "best" + // should predict at least as well as the origin regardless. + { + const int D = 3; + const int numTrials = 128; + const double trueOpt[3] = {0.3, -0.2, 0.4}; + const double curvature = 0.1; // very weak — winrate spans only ~0.39-0.50 + + bool foundConvex = false; + for(uint64_t tunerSeed = 0; tunerSeed < 50 && !foundConvex; tunerSeed++) { + mt19937_64 outcomeRng(tunerSeed * 1000); + uniform_real_distribution uni01(0.0, 1.0); + + QRSTuner tuner(D, /*seed=*/tunerSeed, numTrials, + /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5, + /*sigma_init=*/0.60, /*sigma_fin=*/0.20); + + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + double sc = 0.0; + for(int d = 0; d < D; d++) { + double dx = sample[d] - trueOpt[d]; + sc -= curvature * dx * dx; + } + double winProb = sigmoid(sc); + double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0; + tuner.addResult(sample, outcome); + } + + if(tuner.model().hasConvexDim()) { + foundConvex = true; + // Invariant: the optimizer's "best" should predict at least as well + // as the origin, even when some dimensions are convex-fitted. + const QRSModel& model = tuner.model(); + double origin[3] = {0.0, 0.0, 0.0}; + double probAtBest = tuner.bestWinProb(); + double probAtOrigin = model.predict(origin); + testAssert(probAtBest >= probAtOrigin); + } + } + // At least one seed should trigger convex fitting in a nearly-flat landscape. + testAssert(foundConvex); + } + + // Regression test for Newton-Raphson intercept divergence on a flat 2D + // landscape (true winrate = 50% everywhere, so correct intercept = 0). + // Scans seeds until one triggers the warm-start saturation cascade. + { + const int D = 2; + const int numTrials = 100; + // Diverged intercepts are 400-500; non-diverged are < 1. + const double DIVERGE_THRESHOLD = 50.0; + + bool diverged = false; + uniform_real_distribution uni01(0.0, 1.0); + + for(uint64_t tunerSeed = 0; tunerSeed < 20 && !diverged; tunerSeed++) { + mt19937_64 outcomeRng(tunerSeed * 1000 + 7); + + QRSTuner tuner(D, /*seed=*/tunerSeed, numTrials, + /*l2_reg=*/0.1, /*refit_every=*/10, /*prune_every=*/5, + /*sigma_init=*/0.40, /*sigma_fin=*/0.05); + + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + double outcome = (uni01(outcomeRng) < 0.5) ? 1.0 : 0.0; + tuner.addResult(sample, outcome); + } + + if(fabs(tuner.model().beta()[0]) > DIVERGE_THRESHOLD) + diverged = true; + } + + // Fixed: no seed triggers intercept divergence after removing warm-start. + testAssert(!diverged); + } + + // Test convergence scaling: 1D quadratic landscapes with 100, 1000, + // and 10000 trials. More trials should yield tighter standard errors + // and a closer estimate of the true optimum. + // + // Two landscapes are tested: + // Steep: score = 1.5 - 3.0*(x-0.25)^2, peak winrate ~0.818 + // Flat: score = 0.1 - 0.2*(x-0.25)^2, peak winrate ~0.525 (15x weaker curvature) + { + const double trueOpt = 0.25; + const int D = 1; + const uint64_t tunerSeed = 77; + const double l2_reg = 0.1; + const int refit_every = 10; + const int prune_every = 5; + const double sigma_init = 0.50; + const double sigma_fin = 0.15; + + struct TrialResult { double dist, winProb, se; bool seOk; }; + + auto runTrials = [&](double intercept, double curvature, + int numTrials, uint64_t outcomeSeed) -> TrialResult { + mt19937_64 outcomeRng(outcomeSeed); + uniform_real_distribution uni01(0.0, 1.0); + + QRSTuner tuner(D, tunerSeed, numTrials, + l2_reg, refit_every, prune_every, + sigma_init, sigma_fin); + for(int trial = 0; trial < numTrials; trial++) { + vector sample = tuner.nextSample(); + double dx = sample[0] - trueOpt; + double winProb = sigmoid(intercept - curvature * dx * dx); + double outcome = (uni01(outcomeRng) < winProb) ? 1.0 : 0.0; + tuner.addResult(sample, outcome); + } + vector best = tuner.bestCoords(); + double dist = fabs(best[0] - trueOpt); + double wp = tuner.bestWinProb(); + + double se[1] = {-1.0}; + bool clamped[1]; + bool seOk = tuner.model().computeOptimumSE(tuner.buffer().xs(), se, clamped); + + cout << " Trials=" << numTrials + << " best=" << best[0] + << " dist=" << dist + << " winProb=" << wp + << " convex=" << (tuner.model().hasConvexDim() ? "Y" : "N") + << " seOk=" << (seOk ? "Y" : "N"); + if(seOk) + cout << " SE=" << se[0] + << " 95%CI=[" << (best[0] - 1.96 * se[0]) << ", " << (best[0] + 1.96 * se[0]) << "]"; + cout << endl; + + return {dist, wp, seOk ? se[0] : -1.0, seOk}; + }; + + // --- Steep landscape: curvature=3.0, peak winrate ~0.818 --- + cout << "Convergence scaling (1D quadratic, true optimum at 0.25):" << endl; + { + TrialResult small = runTrials(1.5, 3.0, 100, /*outcomeSeed=*/1001); + TrialResult med = runTrials(1.5, 3.0, 1000, /*outcomeSeed=*/1002); + TrialResult large = runTrials(1.5, 3.0, 10000, /*outcomeSeed=*/1003); + + testAssert(small.seOk); + testAssert(med.seOk); + testAssert(large.seOk); + + // True optimum should fall within the 95% CI + testAssert(fabs(small.dist) < 1.96 * small.se); + testAssert(fabs(med.dist) < 1.96 * med.se); + testAssert(fabs(large.dist) < 1.96 * large.se); + + // 100 trials: rough convergence + testAssert(small.dist < 0.30); + testAssert(small.winProb > 0.60); + + // 1000 trials: solid convergence + testAssert(med.dist < 0.10); + testAssert(med.winProb > 0.75); + + // 10000 trials: tight convergence + testAssert(large.dist < 0.05); + testAssert(large.winProb > 0.75); + + testAssert(large.se < med.se); + testAssert(med.se < small.se); + } + + // --- Flat landscape: curvature=0.2, peak winrate ~0.525 --- + // With weak curvature, adaptive sampling can lead the model to the boundary, + // making SE non-monotonic across trial counts. + cout << "Flat convergence scaling (1D, curvature=0.2, true optimum at 0.25):" << endl; + { + TrialResult small = runTrials(0.10, 0.20, 100, /*outcomeSeed=*/1001); + TrialResult med = runTrials(0.10, 0.20, 1000, /*outcomeSeed=*/1002); + TrialResult large = runTrials(0.10, 0.20, 10000, /*outcomeSeed=*/1003); + + // 100 trials on a flat function: may not converge at all + testAssert(small.winProb > 0.40); + + // 1000 trials: rough convergence (may still be far off on flat landscape) + testAssert(med.winProb > 0.45); + + // 10000 trials: moderate convergence (peak winrate is only 0.525) + testAssert(large.dist < 0.50); + testAssert(large.winProb > 0.48); + } + } +} diff --git a/cpp/qrstune/QRSOptimizer.h b/cpp/qrstune/QRSOptimizer.h new file mode 100644 index 000000000..9f9c14713 --- /dev/null +++ b/cpp/qrstune/QRSOptimizer.h @@ -0,0 +1,188 @@ +// qrstune/QRSOptimizer.h + +#ifndef QRSTUNE_QRSOPTIMIZER_H_ +#define QRSTUNE_QRSOPTIMIZER_H_ + +#include +#include +#include + +class Logger; + +namespace QRSTune { + +// ============================================================ +// Feature map phi(x): +// [1, x_0..x_{D-1}, x_0^2..x_{D-1}^2, x_i*x_j for i>& A, std::vector& b); + +// ============================================================ +// QRSModel: quadratic logistic regression with L2 regularization. +// Provides MAP estimation and win-probability prediction. +// ============================================================ +class QRSModel { + int D_, F_; + std::vector beta_; // F coefficients (intercept, linear, quad, cross) + double l2_; // L2 regularization strength + + // Build the negative Hessian (Fisher info + L2 prior) at current beta. + void buildNegHessian(const std::vector>& xs, + std::vector>& negH) const; + + // Build the D x D quadratic Hessian M and rhs for the system M*x = -linearCoeffs. + void buildQuadHessian(std::vector>& M, + std::vector& rhs) const; + + public: + QRSModel(); + QRSModel(int D, double l2_reg = 0.1); + + // Newton-Raphson MAP estimation. + // xs: sample coordinates; ys: outcomes in {0.0, 0.5, 1.0} + void fit(const std::vector>& xs, + const std::vector& ys, + int max_iter = 30); + + // Win probability at x[0..D-1] + double predict(const double* x) const; + + // Linear score phi(x)^T beta (used for MAP maximization) + double score(const double* x) const; + + // Find x in [-1,+1]^D that maximizes score(x) = phi(x)^T beta. + // For a quadratic, the unconstrained stationary point satisfies: + // M x = -b_lin + // where M[i][i] = 2*beta_quad[i], M[i][j]=M[j][i] = beta_cross[i,j], + // b_lin[i] = beta_linear[i]. + // The solution is clamped to [-1,+1]^D. + void mapOptimum(double* out_x) const; + + // Returns true if any fitted quadratic coefficient is non-negative (convex), + // indicating the fit is unreliable due to noise. + bool hasConvexDim() const; + + int dims() const { return D_; } + int features() const { return F_; } + const std::vector& beta() const { return beta_; } + + // Compute standard errors of the MAP optimum x* via the delta method. + // Rebuilds the Fisher information matrix from xs at current beta, inverts + // it to get Cov(beta), then propagates through the beta -> x* mapping. + // On success, fills se[0..D-1] with SEs in normalized [-1,+1] coords and + // sets clamped[0..D-1] to true for dims where x* was clamped to boundary. + // Returns false if the Hessian is singular (CIs unavailable). + bool computeOptimumSE(const std::vector>& xs, + double* se, + bool* clamped) const; +}; + +// ============================================================ +// QRSBuffer: sample storage with confidence-based pruning. +// Samples whose predicted win rate is far below the current +// MAP estimate are dropped to keep the model locally focused. +// ============================================================ +class QRSBuffer { + std::vector> xs_; + std::vector ys_; + int min_keep_; // never prune below this count + double prune_margin_; // drop samples where p_pred < p_best - margin + + public: + QRSBuffer(int min_keep = 30, double prune_margin = 0.25); + + void add(const std::vector& x, double y); + + // Remove samples significantly below the current MAP win estimate. + // The min_keep_ guard retains the oldest samples (in insertion order), + // preserving spatial diversity from early uniform exploration. + void prune(const QRSModel& model); + + const std::vector>& xs() const { return xs_; } + const std::vector& ys() const { return ys_; } + int size() const { return (int)xs_.size(); } +}; + +// ============================================================ +// QRSTuner: top-level interface +// +// Usage: +// QRSTuner tuner(D, seed, numTrials); +// for each trial: +// auto x = tuner.nextSample(); +// ... run game, get win=1.0 / loss=0.0 / draw=0.5 ... +// tuner.addResult(x, outcome); +// auto best = tuner.bestCoords(); +// ============================================================ +class QRSTuner { + int D_; + QRSModel model_; + QRSBuffer buffer_; + std::mt19937_64 rng_; + int trial_count_; + int total_trials_; + int refit_every_; // refit model after every N trials + int prune_every_; // prune once per this many refits + + // Exploration noise std dev: decays linearly from initial to final + double sigma_initial_; + double sigma_final_; + + Logger* logger_; // non-null enables verbose diagnostic logging + std::string pendingLogMsg_; // assembled in nextSample(), flushed in addResult() + + public: + // D : number of dimensions + // seed : RNG seed for reproducibility + // total_trials : expected total number of trials (for scheduling) + // l2_reg : L2 regularization for QRSModel (default 0.1) + // refit_every : how often to refit model (default 10 trials) + // prune_every : prune every N-th refit (default 5) + QRSTuner(int D, uint64_t seed, int total_trials, + double l2_reg = 0.1, + int refit_every = 10, + int prune_every = 5, + double sigma_init = 0.40, + double sigma_fin = 0.05); + + // Propose next point to evaluate. + // During early exploration (< F+1 samples) or when the model has convex + // dimensions (noise-dominated landscape), returns a uniform random point. + // Afterwards: MAP optimum + decaying Gaussian noise clamped to [-1,+1]^D. + std::vector nextSample(); + + // Record the outcome of a trial. + // y: 1.0 = win, 0.0 = loss, 0.5 = draw + void addResult(const std::vector& x, double y, const std::string& logSuffix = ""); + + // Return current MAP optimum in [-1,+1]^D + std::vector bestCoords() const; + + // Estimated win probability at the MAP optimum + double bestWinProb() const; + + // Enable diagnostic logging (refits, pruning, sample coords). + void setLogger(Logger* logger); + + int trialCount() const { return trial_count_; } + int dims() const { return D_; } + const QRSModel& model() const { return model_; } + const QRSBuffer& buffer() const { return buffer_; } +}; + +void runTests(); + +} // namespace QRSTune + +#endif // QRSTUNE_QRSOPTIMIZER_H_