感谢您在这件事上花大量时间帮助我。我正试图用pthread计算平方数的和。然而,它似乎比串行实现还要慢。此外,当我增加线程数量时,程序会变得更慢。我确保每个线程都在不同的内核上运行(我为虚拟机分配了6个内核(
这是串行程序:
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <time.h>
int main(int argc, char *argv[]) {
struct timeval start, end;
gettimeofday(&start, NULL); //start time of calculation
int n = atoi(argv[1]);
long int sum = 0;
for (int i = 1; i < n; i++){
sum += (i * i);
}
gettimeofday(&end, NULL); //end time of calculation
printf("The sum of squares in [1,%d): %ld | Time Taken: %ld mirco seconds n",n,sum,
((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)));
return 0;
}
这是Pthreads程序:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/time.h>
#include <time.h>
void *Sum(void *param);
// structure for thread arguments
struct thread_args {
int tid;
int a; //start
int b; //end
long int result; // partial results
};
int main(int argc, char *argv[])
{
struct timeval start, end;
gettimeofday(&start, NULL); //start time of calculation
int numthreads;
int number;
double totalSum=0;
if(argc < 3 ){
printf("Usage: ./sum_pthreads <numthreads> <number> ");
return 1;
}
numthreads = atoi(argv[1]);
number = atoi(argv[2]);;
pthread_t tid[numthreads];
struct thread_args targs[numthreads];
printf("I am Process | range: [%d,%d)n",1,number);
printf("Running Threads...nn");
for(int i=0; i<numthreads;i++ ){
//Setting up the args
targs[i].tid = i;
targs[i].a = (number)*(targs[i].tid)/(numthreads);
targs[i].b = (number)*(targs[i].tid+1)/(numthreads);
if(i == numthreads-1 ){
targs[i].b = number;
}
pthread_create(&tid[i],NULL,Sum, &targs[i]);
}
for(int i=0; i< numthreads; i++){
pthread_join(tid[i],NULL);
}
printf("Threads Exited!n");
printf("Process collecting information...n");
for(int i=0; i<numthreads;i++ ){
totalSum += targs[i].result;
}
gettimeofday(&end, NULL); //end time of calculation
printf("Total Sum is: %.2f | Taken Time: %ld mirco seconds n",totalSum,
((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)));
return 0;
}
void *Sum(void *param) {
int start = (*(( struct thread_args*) param)).a;
int end = (*((struct thread_args*) param)).b;
int id = (*((struct thread_args*)param)).tid;
long int sum =0;
printf("I am thread %d | range: [%d,%d)n",id,start,end);
for (int i = start; i < end; i++){
sum += (i * i);
}
(*((struct thread_args*)param)).result = sum;
printf("I am thread %d | Sum: %ldnn", id ,(*((struct thread_args*)param)).result );
pthread_exit(0);
}
结果:
hamza@hamza:~/Desktop/lab4$ ./sum_serial 10
The sum of squares in [1,10): 285 | Time Taken: 7 mirco seconds
hamza@hamza:~/Desktop/lab4$ ./sol 2 10
I am Process | range: [1,10)
Running Threads...
I am thread 0 | range: [0,5)
I am thread 0 | Sum: 30
I am thread 1 | range: [5,10)
I am thread 1 | Sum: 255
Threads Exited!
Process collecting information...
Total Sum is: 285.00 | Taken Time: 670 mirco seconds
hamza@hamza:~/Desktop/lab4$ ./sol 3 10
I am Process | range: [1,10)
Running Threads...
I am thread 0 | range: [0,3)
I am thread 0 | Sum: 5
I am thread 1 | range: [3,6)
I am thread 1 | Sum: 50
I am thread 2 | range: [6,10)
I am thread 2 | Sum: 230
Threads Exited!
Process collecting information...
Total Sum is: 285.00 | Taken Time: 775 mirco seconds
hamza@hamza:~/Desktop/lab4$
这两个程序做的事情非常不同。例如,线程化程序产生更多的文本输出并创建一堆线程。您正在比较非常的短距离运行(不到千分之一秒(,因此这些额外的开销是显著的。
您必须使用更长的运行时间进行测试,这样就失去了产生额外输出以及创建和同步线程的成本。
打个比方,一个人拧紧三个螺丝的速度比三个人快,因为每个人都要拿一个工具,决定谁来拧紧哪个螺丝,等等都会带来开销。但如果你有500个螺丝要拧紧,那么三个人会更快地完成。