Rust itertools与可变长度计数的组合



我想计算一个向量的组合。

我可以使用itertools::Itertools:combinations这样的特性轻松地做到这一点:

vec![1, 2, 3].iter().combinations(2).for_each(|x| {
println!("{:?}", x);
});

但我想指定组合长度以及这些长度的计数。例如:

values = [0, 1, 2, 3, 4]
# 1 group with a length of 3 and 1 group with a length of 2
len_counts = { 3: 1, 2: 1 }
combinations = [
[{0, 1, 2}, {3, 4}]
[{0, 1, 3}, {2, 4}]
[{0, 1, 4}, {2, 3}]
[{0, 2, 3}, {1, 4}]
[{0, 2, 4}, {1, 3}]
[{0, 3, 4}, {1, 2}]
[{1, 2, 3}, {0, 4}]
[{1, 2, 4}, {0, 3}]
[{1, 3, 4}, {0, 2}]
[{2, 3, 4}, {0, 1}]
]

我希望它是懒惰加载和尽可能干净。我试着得到这个输出有一段时间了,但没能成功。感谢您的帮助。

编辑:用于表示变量的组合顺序和数据结构并不重要。

经过一番思考,我很遗憾没能想出一个干净简单的解决方案。

尽管如此,我还是想出了一个解决方案:(虽然很乱,但恐怕:D

use std::{collections::HashSet, iter::Peekable};
use itertools::{Combinations, Itertools};
// This struct is so we can `HashSet` by reference address.
// This prevents that `T` needs to be hashable.
struct GroupedCombinationsValue<'a, T>(&'a T);
impl<'a, T> GroupedCombinationsValue<'a, T> {
fn new(val: &'a T) -> Self {
Self(val)
}
}
impl<'a, T> std::hash::Hash for GroupedCombinationsValue<'a, T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
std::ptr::hash(self.0, state);
}
}
impl<'a, T> PartialEq for GroupedCombinationsValue<'a, T> {
fn eq(&self, other: &Self) -> bool {
std::ptr::eq(self.0, other.0)
}
}
impl<'a, T> Clone for GroupedCombinationsValue<'a, T> {
fn clone(&self) -> Self {
Self(self.0)
}
}
impl<'a, T> Eq for GroupedCombinationsValue<'a, T> {}
struct GroupedCombinations<'a, T> {
values: HashSet<GroupedCombinationsValue<'a, T>>,
leftover_counts: &'a [usize],
iter: Peekable<Combinations<std::vec::IntoIter<&'a T>>>,
child_iter: Option<Box<GroupedCombinations<'a, T>>>,
}
impl<'a, T> GroupedCombinations<'a, T> {
fn new(values: Vec<&'a T>, counts: &'a [usize]) -> Self {
let count;
let leftover_counts;
if counts.len() == 0 {
count = 0;
leftover_counts = counts;
} else {
count = counts[0];
leftover_counts = &counts[1..];
}
let iter = values.clone().into_iter().combinations(count).peekable();
let values = values
.into_iter()
.map(GroupedCombinationsValue::new)
.collect::<HashSet<_>>();
Self {
values,
leftover_counts,
iter,
child_iter: None,
}
}
}
impl<'a, T> Iterator for GroupedCombinations<'a, T> {
type Item = Vec<Vec<&'a T>>;
fn next(&mut self) -> Option<Self::Item> {
let local_value = self.iter.peek()?;
if self.child_iter.is_none() && !self.leftover_counts.is_empty() {
let child_values = self
.values
.difference(
&local_value
.iter()
.cloned()
.map(GroupedCombinationsValue::new)
.collect(),
)
.map(|e| e.0)
.collect::<Vec<_>>();
self.child_iter = Some(Box::new(Self::new(child_values, self.leftover_counts)));
}
let mut result = vec![];
if !local_value.is_empty() {
result.extend_from_slice(&[local_value.clone()]);
}
if let Some(child_iter) = &mut self.child_iter {
match child_iter.next() {
Some(child_value) => {
result.extend(child_value);
Some(result)
}
None => {
self.child_iter = None;
self.iter.next();
self.next()
}
}
} else {
self.iter.next();
Some(result)
}
}
}
fn grouped_combinations<'a, T>(values: &'a [T], counts: &'a [usize]) -> GroupedCombinations<'a, T> {
GroupedCombinations::new(values.iter().collect(), counts)
}
fn main() {
let values = [0, 1, 2, 3, 4];
let counts = [3, 2];
let combinations = grouped_combinations(&values, &counts);
for combination in combinations {
println!("{:?}", combination);
}
}
[[0, 1, 2], [3, 4]]
[[0, 1, 3], [2, 4]]
[[0, 1, 4], [2, 3]]
[[0, 2, 3], [1, 4]]
[[0, 2, 4], [1, 3]]
[[0, 3, 4], [1, 2]]
[[1, 2, 3], [4, 0]]
[[1, 2, 4], [3, 0]]
[[1, 3, 4], [2, 0]]
[[2, 3, 4], [1, 0]]

听起来您想要做的是将一系列n项划分为m集,其中每个集都有预定义的长度?您可以使用递归方法:

给定一系列长度lengths和所需的事物列表items:

  • 首先检查lengths是否为空,如果为空,则生成一个空列表并停止
  • lengths弹出第一个长度并将其存储在current_length
  • 对于长度为CCD_ 11的CCD_ 10的每个组合CCD_
    1. 使用items中未包含在combination中的所有项目生成新列表remaining_items
    2. 使用参数lengthsremaining_items对该函数进行递归调用,并且对于每个结果rest执行:
      • 预处理combination得到rest

这将为您提供一个生成器,该生成器将在没有任何重复的情况下生成所需的结果。

如果您可以在夜间使用rust和itertools库,则实现是:

#![feature(generators, generator_trait)]
use std::{ops::Generator, iter::FromIterator, pin::Pin};
use std::ops::GeneratorState;
use itertools::Itertools;
fn partition_series(sizes: Vec<usize>, items: Vec<u64>) -> impl Iterator<Item = Vec<Vec<u64>>> {
GeneratorToIterator(move || {
if sizes.len() == 0 {
yield vec![];
return;
}

let current_size = sizes[0];
for combination in items.clone().into_iter().combinations(current_size) {
let remaining_items: Vec<u64> = items
.clone()
.into_iter()
.filter(|n| !combination.contains(n))
.collect();
let inner_generator: Box<dyn Iterator<Item = Vec<Vec<u64>>>> = Box::new(partition_series(sizes[1..].into(), remaining_items));
for mut rest in inner_generator {
rest.insert(0, combination.clone());
yield rest;
} 
}
})
}
struct GeneratorToIterator<G>(G);
impl<G> Iterator for GeneratorToIterator<G>
where
G: Generator<Return = ()>,
{
type Item = G::Yield;
fn next(&mut self) -> Option<Self::Item> {
let me = unsafe { Pin::new_unchecked(&mut self.0) };
match me.resume(()) {
GeneratorState::Yielded(x) => Some(x),
GeneratorState::Complete(_) => None,
}
}
}

您可以通过:调用从0到n的一系列数字

fn main() {
let sizes = vec![3, 2];
let total_size = sizes.iter().sum::<usize>() as u64;
let numbers = Vec::from_iter(0..total_size);
for combination in partition_series(sizes, numbers) {
println!("{:?}", combination);
}
}

这将产生以下输出:

[[0, 1, 2], [3, 4]]
[[0, 1, 3], [2, 4]]
[[0, 1, 4], [2, 3]]
[[0, 2, 3], [1, 4]]
[[0, 2, 4], [1, 3]]
[[0, 3, 4], [1, 2]]
[[1, 2, 3], [0, 4]]
[[1, 2, 4], [0, 3]]
[[1, 3, 4], [0, 2]]
[[2, 3, 4], [0, 1]]

由于rust实现可能有点难以理解,因为生成器的人机工程学有些笨拙,这里还有一个python实现:

from typing import Iterable, List
from itertools import combinations

def main():
for combination in generate_next([2, 2, 1]):
print(combination)
def generate_next(sizes: List[int]) -> Iterable[List[List[int]]]:
total_size = sum(sizes)
numbers = list(range(total_size))
yield from generate_next_internal(sizes, numbers)
def generate_next_internal(sizes: List[int], remaining: List[int]) -> Iterable[List[List[int]]]:
if len(sizes) == 0:
yield []
return
current_size = sizes.pop(0)
for combination in combinations(list(remaining), current_size):
new_remaining = [i for i in remaining if i not in combination]
for rest in generate_next_internal(list(sizes), new_remaining):
rest.insert(0, list(combination))
yield rest

if __name__ == '__main__':
main()

相关内容

  • 没有找到相关文章

最新更新