在 javascript 中对数组进行分箱以获得直方图



我在 Javascript 中有以下数组,我需要将其装箱到 20 个存储桶中。数据值介于 0 和 1 之间,因此箱大小为 .05。我觉得应该有一个函数需要两个参数,一个数组和一个箱大小,但我找不到一个。我知道 D3.js 具有一些有助于构建此类数组的功能,但我无法弄清楚哪个函数可能会有所帮助。

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

随着 D3 的发布.js v6 d3.layout.histogram已被现在属于 d3-array 模块的 d3.bin() 取代。

要对数组进行装箱,请创建一个直方图生成器:

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0,1]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

还有更多选项可用于配置阈值,从而配置箱,但此生成器将完全按照您的要求进行操作。通过使用值数组调用生成器,将计算的箱作为数组检索:

var bins = histGenerator(arr);

看看这个工作示例:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
];
var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0;]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins
var bins = histGenerator(arr);
console.log(bins);
<script src="http://d3js.org/d3.v6.js"></script>

您可以使用一些JS自己制作箱:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]
var bins = [];
var binCount = 0;
var interval = .05;
var numOfBuckets = 1;
//Setup Bins
for(var i = 0; i < numOfBuckets; i += interval){
  bins.push({
    binNum: binCount,
    minNum: i,
    maxNum: i + interval,
    count: 0
  })
  binCount++;
}
//Loop through data and add to bin's count
for (var i = 0; i < arr.length; i++){
  var item = arr[i];
  for (var j = 0; j < bins.length; j++){
    var bin = bins[j];
    if(item > bin.minNum && item <= bin.maxNum){
      bin.count++;
      break;  // An item can only be in one bin.
    }
  }  
}

https://jsbin.com/keropoyadu/edit?js,output

d3js 库有一个 d3.layout.histogram() 函数,该函数返回直方图布局对象,用于将数据分组到箱中。布局对象既是对象又是函数。可以对布局对象调用方法来设置布局的所需行为。然后,可以调用布局对象将数据分组到箱数组中。每个箱都是一个值数组。每个箱都有 x、dx、dy 的加法属性。

例如,以下代码会将数据分组到 20 个箱中,涵盖从 0 到 1 的范围。

var arr = ["0.362743", "0.357969", "0.356322", "0.355757", "0.358511", "0.357218", "0.356696", "0.354579", "0.828295", "0.391186", "0.378577", "0.39372", "0.396416", "0.395641", "0.37573", "0.379666", "0.377443", "0.391842", "0.402021", "0.377516", "0.38936", "0.38936", "0.400883", "0.393171", "0.374419", "0.400821", "0.380502", "0.396098", "0.388256", "0.398968", "0.392525", "0.401858", "0.387297", "0.376471", "0.378183", "0.379787", "0.382024", "0.387928", "0.395367", "0.391972", "0.381295", "0.391183", "0.383598", "0.386424", "0.384338", "0.401834", "0.406253", "0.392854", "0.399266", "0.400804", "0.391146", "0.395441", "0.396265", "0.397894", "0.384822", "0.385181", "0.395443", "0.400981", "0.401716", "0.406633", "0.406887", "0.40694", "0.391219", "0.387946", "0.398858", "0.402233", "0.388583", "0.389772", "0.397084", "0.711566", "0.954557", "0.524007", "0.672288", "0.668441", "0.421726", "0.549536", "0.932952", "0.397851", "0.395536", "0.354818", "0.374355", "0.375257", "0.362613", "0.391271", "0.379219", "0.363316", "0.866006", "0.862254", "0.864403", "0.861346", "0.845225", "0.784467", "0.801275", "0.638579", "0.847282", "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"];
var bins = d3.layout.histogram()  // create layout object
    .bins(20)       // to use 20 bins
    .range([0, 1])  // to cover range from 0 to 1
    (arr);          // group the data into the bins

代码运行后...

bins[i] is an array of values in the ith bin
bins[i].x is the lower bounds of the ith bin
bins[i].dx is the width of the ith bin
bins[i].x + bins[i].dx is the upper bounds of the ith bin
bins[i].y is the number of values in the ith bin

直方图布局对象的文档位于...

https://github.com/d3/d3/wiki/Histogram-Layout

注: 默认情况下,布局对象将字符串值转换为数值。因此,布局函数将处理您的字符串值。

您想要的功能是直方图布局。你可以做这样的事情:

var data = d3.layout.histogram()
    .bins(20)
    (arr);

这只是一个一般示例,您必须调整值。检查文档:https://github.com/d3/d3/wiki/Histogram-Layout

最新更新