Ensemble
To uruchamia zestaw powiązanych modeli. Poszczególne modele uwzględniają różne ilości historii i mają opcję albo wyboru ruchu, który zoptymalizuje oczekiwaną różnicę wypłat, albo losowego wyboru ruchu proporcjonalnego do oczekiwanej różnicy wypłat.
Każdy członek zespołu głosuje następnie na preferowany ruch. Otrzymują liczbę głosów równą temu, ile więcej wygrywają niż przeciwnik (co oznacza, że okropne modele otrzymają głosy negatywne). Którykolwiek ruch wygrywa, głosowanie jest następnie wybierane.
(Prawdopodobnie powinni podzielić swoje głosy między ruchy proporcjonalnie do tego, na ile faworyzują każdy, ale teraz nie dbam o to wystarczająco dużo).
Pokonuje wszystko, co do tej pory opublikowano, z wyjątkiem EvaluaterBot i PatternFinder. (Jeden na jednego, pokonuje EvaluaterBot i przegrywa z PatternFinder).
from collections import defaultdict
import random
class Number6:
class Choices:
def __init__(self, C = 0, N = 0, D = 0):
self.C = C
self.N = N
self.D = D
def __init__(self, strategy = "maxExpected", markov_order = 3):
self.MARKOV_ORDER = markov_order;
self.my_choices = ""
self.opponent = defaultdict(lambda: self.Choices())
self.choice = None # previous choice
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
self.total_payoff = 0
# if random, will choose in proportion to payoff.
# otherwise, will always choose argmax
self.strategy = strategy
# maxExpected: maximize expected relative payoff
# random: like maxExpected, but it chooses in proportion to E[payoff]
# argmax: always choose the option that is optimal for expected opponent choice
def update_opponent_model(self, last):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
self.opponent[hist].C += ("C" == last)
self.opponent[hist].N += ("N" == last)
self.opponent[hist].D += ("D" == last)
def normalize(self, counts):
sum = float(counts.C + counts.N + counts.D)
if 0 == sum:
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
return self.Choices(
counts.C / sum, counts.N / sum, counts.D / sum)
def get_distribution(self):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
#print "check hist = " + hist
if hist in self.opponent:
return self.normalize(self.opponent[hist])
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
def choose(self, dist):
payoff = self.Choices()
# We're interested in *beating the opponent*, not
# maximizing our score, so we optimize the difference
payoff.C = (3-3) * dist.C + (4-1) * dist.N + (0-5) * dist.D
payoff.N = (1-4) * dist.C + (2-2) * dist.N + (3-2) * dist.D
payoff.D = (5-0) * dist.C + (2-3) * dist.N + (1-1) * dist.D
# D has slightly better payoff on uniform opponent,
# so we select it on ties
if self.strategy == "maxExpected":
if payoff.C > payoff.N:
return "C" if payoff.C > payoff.D else "D"
return "N" if payoff.N > payoff.D else "D"
elif self.strategy == "randomize":
payoff = self.normalize(payoff)
r = random.uniform(0.0, 1.0)
if (r < payoff.C): return "C"
return "N" if (r < payoff.N) else "D"
elif self.strategy == "argMax":
if dist.C > dist.N:
return "D" if dist.C > dist.D else "N"
return "C" if dist.N > dist.D else "N"
assert(0) #, "I am not a number! I am a free man!")
def update_history(self):
self.my_choices += self.choice
if len(self.my_choices) > self.MARKOV_ORDER:
assert(len(self.my_choices) == self.MARKOV_ORDER + 1)
self.my_choices = self.my_choices[1:]
def round(self, last):
if last: self.update_opponent_model(last)
dist = self.get_distribution()
self.choice = self.choose(dist)
self.update_history()
return self.choice
class Ensemble:
def __init__(self):
self.models = []
self.votes = []
self.prev_choice = []
for order in range(0, 6):
self.models.append(Number6("maxExpected", order))
self.models.append(Number6("randomize", order))
#self.models.append(Number6("argMax", order))
for i in range(0, len(self.models)):
self.votes.append(0)
self.prev_choice.append("D")
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
def round(self, last):
if last:
for i in range(0, len(self.models)):
self.votes[i] += self.payoff[self.prev_choice[i]][last]
# vote. Sufficiently terrible models get negative votes
C = 0
N = 0
D = 0
for i in range(0, len(self.models)):
choice = self.models[i].round(last)
if "C" == choice: C += self.votes[i]
if "N" == choice: N += self.votes[i]
if "D" == choice: D += self.votes[i]
self.prev_choice[i] = choice
if C > D and C > N: return "C"
elif N > D: return "N"
else: return "D"
Framework testowy
W przypadku, gdy ktokolwiek uzna to za przydatne, oto struktura testowa do wyszukiwania pojedynczych pojedynków. Python2. Po prostu umieść wszystkich przeciwników, których jesteś zainteresowany, w opponents.py i zmień odniesienia do Ensemble na własne.
import sys, inspect
import opponents
from ensemble import Ensemble
def count_payoff(label, them):
if None == them: return
me = choices[label]
payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
if label not in total_payoff: total_payoff[label] = 0
total_payoff[label] += payoff[me][them]
def update_hist(label, choice):
choices[label] = choice
opponents = [ x[1] for x
in inspect.getmembers(sys.modules['opponents'], inspect.isclass)]
for k in opponents:
total_payoff = {}
for j in range(0, 100):
A = Ensemble()
B = k()
choices = {}
aChoice = None
bChoice = None
for i in range(0, 100):
count_payoff(A.__class__.__name__, bChoice)
a = A.round(bChoice)
update_hist(A.__class__.__name__, a)
count_payoff(B.__class__.__name__, aChoice)
b = B.round(aChoice)
update_hist(B.__class__.__name__, b)
aChoice = a
bChoice = b
print total_payoff