我在汇编时有一个奇怪的错误:
#include <stdio.h>
#include <stdlib.h>
#include <mach/mach_time.h>
#include <mm_malloc.h>
#ifdef SSE
#include <x86intrin.h>
#define ALIGN 16
void addition_tab(int size, double *a, double *b, double *c)
{
int i;
// Main loop
for (i=size-1; i>=0; i-=2)
{
// Intrinsic SSE syntax
const __m128d x = _mm_loadu_pd(a); // Load two x elements
const __m128d y = _mm_loadu_pd(b); // Load two y elements
const __m128d sum = _mm_add_pd(x, y); // Compute two sum elements
_mm_storeu_pd(c, sum); // Store two sum elements
// Increment pointers by 2 since SSE vectorizes on 128 bits = 16 bytes = 2*sizeof(double)
a += 2;
b += 2;
c += 2;
}
}
#endif
int main(int argc, char *argv[])
{
// Array index
int i;
// Array size as argument
int size = atoi(argv[1]);
// Time elapsed
uint64_t t1, t2;
float duration;
// Two input arrays
double *tab_x;
double *tab_y;
double *tab_z;
// Get the timebase info
mach_timebase_info_data_t info;
mach_timebase_info(&info);
#ifdef NOVEC
// Allocation
tab_x = (double*) malloc(size*sizeof(double));
tab_y = (double*) malloc(size*sizeof(double));
tab_z = (double*) malloc(size*sizeof(double));
#else
// Allocation
tab_x = (double*) _mm_malloc(size*sizeof(double),ALIGN);
tab_y = (double*) _mm_malloc(size*sizeof(double),ALIGN);
tab_z = (double*) _mm_malloc(size*sizeof(double),ALIGN);
#endif
}
如果我使用:
gcc-mp-4.9 -DNOVEC -O0 main.c -o exe
汇编完成了:
gcc-mp-4.9 -DSSE -O3 -msse main.c -o exe
我有以下错误:
main.c: In function 'main':
main.c:96:52: error: 'ALIGN' undeclared (first use in this function)
tab_x = (double*) _mm_malloc(size*sizeof(double),ALIGN);
但是,如果我通过gcc-mp-4.9 -DSSE
通过SSE
宏,则可以定义变量ALIGN
,不是吗?
我发现了您的脚本中的根本原因:您没有隔离Novec,因此始终完成使用NOVEC
宏的编译。您可以使用:
if [ "$1" == "novec" ]; then
# Compile no vectorized and vectorized executables
$GCC -DNOVEC -O0 main_benchmark.c -o noVectorizedExe
$GCC -DNOVEC -O0 main_benchmark.c -S -o noVectorizedExe.s
elif [ "$1" == "sse" ]; then
# Compile with SSE
$GCC -DSSE -O3 -msse main_benchmark.c -o vectorizedExe
$GCC -DSSE -O3 -msse main_benchmark.c -S -o vectorizedExe.s
echo "Test"
elif [ "$1" == "avx" ]; then
# Compile with AVX256
$GCC -DAVX256 -O3 -mavx main_benchmark.c -o vectorizedExe
$GCC -DAVX256 -O3 -mavx main_benchmark.c -S -o vectorizedExe.s
fi
编辑
我发现了,你有一个错别字!
$GCC -DNOVEV -O0 main_benchmark.c -S -o noVectorizedExe.s
应该是
$GCC -DNOVEC -O0 main_benchmark.c -S -o noVectorizedExe.s