r-使用rcpp::cppFunction编译许多嵌套循环



R-用户,

我试图用蛮力来解决一个组合问题,以评估近似计算的质量。为此,我编写了一个小的R函数,它为变量R输出一个极其丑陋的rcpp函数,其中包含R个嵌套循环和一些中断条件。然而,当r开始在20以上的范围内时,用rcpp::cppFunction编译函数需要非常长的时间。。

有什么解释为什么rcpp编译会因为大量的嵌套循环而失败吗?当我使用g++将函数编译为普通C++程序时,它会在不到一秒钟的时间内立即编译,并且运行完美(使用cout而不是Rcout)。

我可能遗漏了一些明显的东西,因为当我删除除最内部的break条件之外的所有条件时,它就会用rcpp编译得很漂亮。然而,当我也删除这个最后中断条件时,它不会再次完成编译。。。有什么建议吗?

附言:这是一个r=20的示例程序,我仍在等待完成编译。警告:它很难看,但是自动生成的。

cppFunction('
int make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
for (int t3=0; t3 <= floor(r/3); t3++) {
for (int t2=0; t2 <= floor(r/2); t2++) {
for (int t1=0; t1 <= floor(r/1); t1++) {
if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
return(0);
}')

正如@spacedman所建议的,当使用sourceCpp时,这里有更多的调试信息。正如在对sourceCpp建议的进一步评论中所写的,它似乎在Linux上工作,因此可能是与Mac相关的问题…:

> sourceCpp(file="foobar.cpp",verbose=TRUE, rebuild=TRUE)
Generated extern "C" functions 
--------------------------------------------------------

#include <Rcpp.h>
// make_tList_rcpp
void make_tList_rcpp();
RcppExport SEXP sourceCpp_1_make_tList_rcpp() {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
make_tList_rcpp();
return R_NilValue;
END_RCPP
}
Generated R functions 
-------------------------------------------------------
`.sourceCpp_1_DLLInfo` <- dyn.load('/private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a/sourceCpp_8.so')
make_tList_rcpp <- Rcpp:::sourceCppFunction(function() {}, TRUE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_make_tList_rcpp')
rm(`.sourceCpp_1_DLLInfo`)
Building shared library
--------------------------------------------------------
DIR: /private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a
/Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB -o 'sourceCpp_8.so' --preclean  'foobar.cpp'  
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG  -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include  -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source"   -fPIC  -Wall -mtune=core2 -g -O2  -c foobar.cpp -o foobar.o

(这就是它挂的地方…)

附言,这是会话信息()

R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     
other attached packages:
[1] Rcpp_0.12.9
loaded via a namespace (and not attached):
[1] compiler_3.3.2 tools_3.3.2   

它编译并运行在macOS的每个平台上。

编辑后,我们有一些有用的调试信息。特别是使用的标志:

clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG  -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include  -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source"   -fPIC  -Wall -mtune=core2 -g -O2  -c foobar.cpp -o foobar.o

macOS上嵌套for循环编译有问题的原因与clang如何优化-O2-Os下的嵌套循环有关。具体来说,clangv3.0的回归直接影响了优化所述循环的能力。特别是,请参阅:

https://llvm.org/bugs/show_bug.cgi?id=16196

这个问题看起来是在3.8中修复的。缺点是,您将不得不手动更新到此编译器,因为此回归在所有macOS机器上都随clang版本一起提供。您可能只想在macOS上切换到gcc。无论如何,下面的帖子应该有助于通过自制程序和~/.R/Makevars设置适当的编译器

http://thecoatlessprofessor.com/programming/openmp-in-r-on-os-x/

您可能希望使用sourceCpp()而不是cppFunction(),因为后者用于更简单的函数。sourceCpp()函数适用于更复杂的情况。此外,我可能会选择避免指定int返回类型。

sourceCpp()的使用示例

# In R
sourceCpp("path_to/example_comb.cpp")

文件:example_comb.cpp

#include <Rcpp.h>
// [[Rcpp::export]]
void make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
  for (int t3=0; t3 <= floor(r/3); t3++) {
      for (int t2=0; t2 <= floor(r/2); t2++) {
          for (int t1=0; t1 <= floor(r/1); t1++) {
              if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
                  Rcpp::Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
              }
              if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
                  break;
              }
          }
          if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
              break;
          }
      }
      if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
          break;
      }
  }
  if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
      break;
  }
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
  break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
}

/*** R
# Runs automatically in R after compile
make_tList_rcpp(42)
*/

功能输出:

https://gist.github.com/coatless/aa51267dcda82b42622fdc8e6e566ab7

最新更新