mac osx 10.8.5、gcc4.9针对AVX支持的编译问题



我的系统是mac osx 10.8.5。此机器上的默认gcc为4.2(i686-apple-darwin11-llvm-gcc-4.2(gcc)4.2.1(基于apple股份有限公司版本5658)(llvm版本2335.15.00))我已经在/usr/local上安装了gcc 4.9。

我有一个使用AVX2内部函数的代码。已附上带有消息的代码。

#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <sys/time.h>
#define SIZE 4
#define TIMES 1
void mmul(const float*a, const float* b, float*c){
    int a_vindex1[4] ={0,0,0,0};
    int b_vindex1[4] = {0,0,0,0};
    int m,i,j,k;
    __m128i  a_index, b_index;
    __m128 a1;
    for (i=0;i< SIZE*SIZE; i+= 1){
        m=(i/SIZE)*4 ;
        for (j=0;j<4;j++){
            b_vindex1[j] = i%SIZE+SIZE*j;
            a_vindex1[j] = m+j;
        }
        a_index = *(__m128i*)&a_vindex1[0];
        b_index = *(__m128i*)&b_vindex1[0];
        a1 = _mm_i32gather_ps(a, a_index, 1);
        printf("nBINDEX %d,%d,%d,%d", b_vindex1[0],b_vindex1[1], b_vindex1[2], b_vindex1[3]);
        printf("nAINDEX %d,%d,%d,%d", a_vindex1[0],a_vindex1[1], a_vindex1[2], a_vindex1[3]);
    }
}
int main(){
    float * a, *b,*c;
    int i,j;
    double timetotal = 0.0;
    struct timeval start,stop,start1, stop1;
    a=(float*)calloc(SIZE*SIZE, sizeof(float));
    b=(float*)calloc(SIZE*SIZE, sizeof(float));
    c=(float*)calloc(SIZE*SIZE, sizeof(float));
    for (i=0;i<SIZE*SIZE;i++){
        a[i] = i;
        b[i] = 0.5*i;
        }
    for (j=0;j<TIMES;j++){
        gettimeofday(&start,NULL);
        mmul(a,b,c);
        gettimeofday(&stop,NULL);
        timetotal += ((double)((stop.tv_sec-start.tv_sec)*1000000+ (stop.tv_usec-start.tv_usec)))/1000000;
    }
    //printf("n time avegrare = %.8lf",timetotal/TIMES);
    return 0;
 }

现在,如果我用gcc 4.9编译这段代码

gcc-4.9-O3-march=core-avx2 a7.c,我得到以下错误消息,

/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:141:no such instruction: `vmovd %r8d, %xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:143:no such instruction: `vmovapd LC15(%rip), %ymm3'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:145:no such instruction: `vbroadcastss %xmm7, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:147:no such instruction: `vpaddd LC13(%rip), %ymm0,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:148:no such instruction: `vpaddd LC14(%rip), %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:149:no such instruction: `vcvtdq2ps %ymm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:150:no such instruction: `vmovups %ymm2, (%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:151:no such instruction: `vcvtdq2pd %xmm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:152:no such instruction: `vmulpd %ymm3, %ymm2,%ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:153:no such instruction: `vextracti128 $0x1, %ymm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:154:no such instruction: `vcvtpd2psy %ymm2, %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:155:no such instruction: `vcvtdq2pd %xmm1, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:156:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:157:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:158:no such instruction: `vinsertf128 $0x1, %xmm1,%ymm2,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:159:no such instruction: `vmovups %ymm1, (%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:162:no such instruction: `vcvtdq2ps %ymm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:163:no such instruction: `vmovups %ymm1, 32(%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:164:no such instruction: `vcvtdq2pd %xmm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:165:no such instruction: `vextracti128 $0x1, %ymm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:166:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:167:no such instruction: `vcvtdq2pd %xmm0, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:168:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:169:no such instruction: `vmulpd %ymm3, %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:170:no such instruction: `vcvtpd2psy %ymm0, %xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:171:no such instruction: `vinsertf128 $0x1, %xmm0,%ymm1,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:172:no such instruction: `vmovups %ymm0, 32(%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:178:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:179:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:181:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:182:no such instruction: `vmovsd LC16(%rip), %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:183:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:184:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:185:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:186:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:187:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:188:no such instruction: `vmovss %xmm5, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:192:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:193:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:195:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:196:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:197:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:198:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:200:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:201:no such instruction: `vcvtsd2ss %xmm0, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:202:no such instruction: `vmovss %xmm6, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:205:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:206:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:208:no such instruction: `vxorps %xmm7, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:209:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:210:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:211:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:213:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:214:no such instruction: `vcvtsd2ss %xmm0, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:215:no such instruction: `vmovss %xmm7, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:218:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:219:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:221:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:222:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:223:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:224:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:226:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:227:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:228:no such instruction: `vmovss %xmm4, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:231:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:232:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:234:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:236:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:237:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:238:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:239:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:240:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:241:no such instruction: `vmovss %xmm5, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:244:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:245:no such instruction: `vcvtsi2sd %edi, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:246:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:248:no such instruction: `vcvtsi2ss %edi, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:249:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:251:no such instruction: `vmulsd %xmm2, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:252:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:253:no such instruction: `vcvtsd2ss %xmm1, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:254:no such instruction: `vmovss %xmm6, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:257:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:258:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:260:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:261:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:262:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:263:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:264:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:265:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:266:no such instruction: `vmovss %xmm4, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:270:no such instruction: `vzeroupper'

我想当我使用gcc-4.9时,汇编程序需要更新吗?或者这看起来像是其他一些问题。。。任何指针都会有所帮助。。

添加选项:

-Wa,-q

到您的编译。

意义有两个来源。首先是GCC手册:

-Wa,选项

将选项作为选项传递给汇编程序。若选项包含逗号,则在逗号处将其拆分为多个选项。

然后是AS(基于GNU的Mac OS X Mach-O汇编程序)手册:

-q使用clang(1)集成汇编程序,而不是基于GNU的系统汇编程序。这是x86和arm体系结构的默认设置。

请注意,此选项在Darwin和GCC的更高版本上也是必要的(例如,在macOS 10.12和GCC 6下测试)。

最新更新