常数时间复杂度的带权值随机选择算法

算法&代码来源：https://www.peterstefek.me/alias-method.html
# 已知数据：N个可能的选项和它们的权值
# 目标：根据权值在N个选项中进行随机选择
 
# 思路：
 
'''
先预处理，将总权值1，均分为N个格子，将N个原本的选项，放置在N个格子中
保证每个格子中的权值，来源于原本的1或2个选项
这样每次随机选择就变成了：在N个等可能的选项中随机选择，然后在2个带权值的选中随机选择
'''
 
import numpy as np
from bintrees import AVLTree
import random
 
def partitionWeights(weights):
    """
    The preprocessing step.
    :param weights: A dictionary of weights which sum to one.
    :return: A partition used to draw quickly from the distribution
    """
    epsilon = 0.00001 # for floating point precision issues
    boxes = []
    numWeights = len(weights)
    # We use a AVLTree to make our pull/push operations O(log n)
    tree = AVLTree(weights)
    for i in xrange(numWeights):
        smallestValue, smallestColor = tree.pop_min() # O(log n)
        overfill = 1.0 / numWeights - smallestValue
        if overfill > epsilon:
            largestValue, largestColor = tree.pop_max() # O(log n)
            largestValue -= overfill
            if largestValue > epsilon:
                tree.insert(largestValue, largestColor) # O(log n)
            boxes.append((smallestValue, smallestColor, largestColor))
        else:
            boxes.append((smallestValue, smallestColor, "none"))
    return boxes
 
def drawFromPartition(partition):
    """
    The draw step.
    :param partition: partition A partition of a distribution into boxes.
    :return: A sample from the distribution represented by the partition.
    """
    numBoxes = len(partition)
    i = random.randint(0, numBoxes - 1)
    value, color1, color2 = partition[i]
    if random.random() / numBoxes <= value:
        return color1
    else:
        return color2
 
weights = {3.0/6.0 : "orange", 1.0/6.0 : "blue", 2.0/6.0 : "green"}
 
partition = partitionWeights(weights)
 
counts = {"orange" : 0, "blue" : 0, "green" : 0}
for i in xrange(600000):
    counts[drawFromPartition(partition)] += 1
 
print counts