



A_vals = [pairs[0] for pairs in tuple_list]
bins = range(min(A_vals), max(A_vals) + bin_width, bin_width)


import numpy as np
data = [(6.5, 2), (3, 3), (4, 4), (5, 6.5), (7, 1), (11, 5.5)]
data = np.array(data)
edge = data[:,0].max() // 5 * 5
bins = np.arange(0, edge, 5)
# bins contains just the lower edges
indices = np.digitize(data[:,0], bins)
unique = set(indices)
means = np.empty(len(unique), dtype=float)
for index in unique:
bind = np.where(indices == index)
means[index-1] = data[bind,1].mean()
print("lower edges:", bins, "upper edges:", bins+5)
print("means for each interval:", means)
lower edges: [0. 5.] upper edges: [ 5. 10.]
means for each interval: [3.5  3.75]


import numpy as np 
import matplotlib.pyplot as plt 
data = np.random.rand(100,2) * 100
for bucket in range(0,100,5):
b = data[(data[:,0] > bucket) & (data[:,0] < (bucket+5)), 1]
print( bucket, b.shape, b.sum(), b.sum() / b.shape[0] )

让我来解释一下。data[:0] > bucket生成布尔值数组,该数组在列0中的元素大于bucket的情况下为true。CCD_ 3对另一端做同样的事情。通过使用&,我们得到一个布尔值数组,该数组为True,其中列0在该范围内。



0 (7,) 486.0666260029982 69.43808942899975
5 (2,) 162.75938362848922 81.37969181424461
10 (6,) 404.65207867981894 67.44201311330316
15 (4,) 216.561058153033 54.14026453825825
20 (4,) 158.6933597307469 39.67333993268672
25 (10,) 590.6337605765742 59.06337605765742
30 (8,) 499.48740241702797 62.435925302128496
35 (8,) 420.51694802312426 52.56461850289053
40 (4,) 142.7888162321809 35.69720405804522
45 (5,) 257.7772788672603 51.55545577345206
50 (3,) 143.1221738596666 47.70739128655553
55 (4,) 305.4444504680107 76.36111261700268
60 (3,) 101.81220129227752 33.937400430759176
65 (5,) 230.0416201277115 46.0083240255423
70 (4,) 207.47647619960338 51.869119049900846
75 (6,) 389.7446815957917 64.95744693263195
80 (2,) 148.1140683360823 74.05703416804116
85 (5,) 338.6856269475073 67.73712538950146
90 (4,) 211.83179680175812 52.95794920043953
95 (6,) 253.24855615591525 42.20809269265254

您使用"bin range";还不错,所以我将通过提供一个示例实现来帮助您。我认为您缺少的是将值强制转换到相应范围的更好方法(这样所有A值都对应于范围0-55-10等等(。


# Generate data to work on
from random import random
data = [(random()*20, random()*100 - 50) for _ in range(100)]
# Based on your question it seems the "bins" accept values differing by 5
classifier_difference = 5
# This is where the mapping comes into place, using defaultdict to avoid explicit initialisation for each key
from collections import defaultdict
bins = defaultdict(list)
# Bins will now map keys 0, 5, 10, ..., to a list of values exactly as they were in B
# You can modify key creation however you want really, but classifier // classifier_difference is the key point
for classifier, value in data:
key = int(classifier // classifier_difference) * classifier_difference


0 : [-32.67530553821753, 24.35350483989876, 12.01416796389043]
5 : [8.908191728237938, -28.726927969770234, 3.3525305442688946, -28.831840180707637]
10 : [-48.54368729435071]
15 : [15.648424404193577, 33.47713312289213]

