我目前正在使用MPI创建一个并行化代码,以找出任何给定图形中有多少个三角形。到目前为止,我的代码能够成功地获得正确数量的三角形(我知道,因为我有相同代码的序列化版本,运行速度要慢得多),直到某一点。我想说,在大约6000个节点之后,我的线程不再在函数中被读取(我通过查看主函数中的线程,而不是计算线程是否进入函数中,发现了这一点)。
作为参考,代码本身包含许多节点、边、种子和度。
为此,我们将忽略种子和程度,因为当一切正常时,它们工作得非常好。
编辑:我将快速解释那些丢失的变量的命名约定。本质上,我们得到了一个由多个节点以及连接它们的边组成的图(想想邻接列表)。现在,程序的任务是遍历每个顶点u,并在图中取另一个顶点v,以确定它们之间是否有相应的顶点w。在这种情况下,由于C中没有布尔值,我们将对uv、uw和vw边使用ints。这样,如果有一个连接边,那么我们可以将其转换为1。如果它们都是1,那么我们找到了一个三角形,现在可以将其添加到全局变量中。让我们知道,在for循环中计算三角形的代码是百分之百正确的,而不是问题所在。相反,这个问题涉及到在更高节点上使用pthread的问题。
下面是代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include "graph.h"
#define MAX_N 1000000
#define MAX_E 2*MAX_N // must be at least twice MAX_N
GRAPH_t * G;
#define MAX_THREADS 65536
#include <pthread.h>
int thread_id[MAX_THREADS]; // User defined id for thread
pthread_t p_threads[MAX_THREADS];// Threads
pthread_attr_t attr; // Thread attributes
pthread_mutex_t lock_count; // Protects minimum, count
unsigned int parallelCount = 0;
unsigned int threadCount = 0;
void *count_ParallelTriangles(void *threadID) {
int u = *((int *)threadID);
int counter = 0;
unsigned int v, w, e, uv, uw, vw;
for (v = u + 1; v < G->n; v++) {
uv = 0;
for (e = G->V_ptr[v]; e < G->V_ptr[v + 1]; e++) {
if (G->E_v[e] == u) {
uv = 1; // Edge (u,v) exists
}
}
if (uv == 1) {
for (w = v + 1; w < G->n; w++) {
uw = 0; vw = 0;
for (e = G->V_ptr[w]; e < G->V_ptr[w + 1]; e++) {
if (G->E_v[e] == u) uw = 1; // Edge (u,w) exists
if (G->E_v[e] == v) vw = 1; // Edge (v,w) exists
}
if ((uv == 1) && (vw == 1) && (uw == 1)) {
counter += 1;
}
}
}
}
//if (counter > 0) {
pthread_mutex_lock(&lock_count);
threadCount += 1;
parallelCount += counter;
pthread_mutex_unlock(&lock_count);
//}
pthread_exit(NULL);
}
下面是主要的调用位置
int main(int argc, char *argv[]) {
struct timespec start, stop;
float time_serial;
unsigned int num_nodes, num_edges, seed, num_triangles, max_degree;
if (argc != 5) {
printf("Use: <executable_name> <num_nodes> <num_edges> <seed>
<max_degree>\n");
exit(0);
}
if ((num_nodes = atoi(argv[argc-4])) > MAX_N) {
printf("Maximum number of nodes allowed: %u\n", MAX_N);
exit(0);
};
if ((num_edges = atoi(argv[argc-3])) > MAX_E) {
printf("Maximum number of edges allowed: %u\n", MAX_E);
exit(0);
};
if (num_edges < 2*num_nodes) {
num_edges = 2*num_nodes;
printf("Number of edges must be at least twice the number of nodes: changing
num_edges to %u\n", num_edges);
exit(0);
};
seed = atoi(argv[argc-2]);
max_degree = atoi(argv[argc-1]);
// Initialize graph
G = init_graph ( num_nodes, num_edges, seed, max_degree );
float time_parallel;
//Initialize Pthread
pthread_mutex_init(&lock_count, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
clock_gettime(CLOCK_REALTIME, &start);
for (int u = 0; u < num_nodes; u++) {
thread_id[u] = u;
pthread_create(&p_threads[u], &attr, count_ParallelTriangles, (void
*)&thread_id[u]);
}
for (int u = 0; u < num_nodes; u++) {
pthread_join(p_threads[u], NULL);
}
clock_gettime(CLOCK_REALTIME, &stop);
time_parallel = (stop.tv_sec - start.tv_sec)
+ 0.000000001*(stop.tv_nsec - start.tv_nsec);
printf("Thread active: %d\n", threadCount);
printf("Parallel execution time = %f s\n", time_parallel);
// Print results
printf("G: Nodes = %u, Edges = %u, Triangles = %u\n", G->n, G->m, parallelCount);
pthread_attr_destroy(&attr);
pthread_mutex_destroy(&lock_count);
return 0;
}
最后,这是正确运行时的输出。将边设置为最大1000000,以便计算其平均度数(在本例中为234110)内可容纳的最大边数。
./triangles.exe 4096 1000000 0 0
Serial execution time = 16.181034 s
G: Nodes = 4096, Edges = 234110, Triangles = 651015
Thread active: 4096
Parallel execution time = 0.843587 s
G: Nodes = 4096, Edges = 234110, Triangles = 651015
我们可以看到,由于线程数量等于声明的节点数量,因此上述方法工作正常。然而,如果我们只增加几千个节点,我们会发现输出不再正常运行,尽管速度仍然很快:
./triangles.exe 6000 1000000 0 0
Serial execution time = 48.326824 s
G: Nodes = 6000, Edges = 413845, Triangles = 1207058
Thread active: 2061
Parallel execution time = 1.471421 s
G: Nodes = 6000, Edges = 413845, Triangles = 1079834
在上面的例子中,如果我们再运行这个示例几次,那么每次调用之间的线程数量都会发生变化,并且它计算的三角形也会随之发生变化(因为每个三角形计数都取决于将其正确传递给全局变量的线程)。序列化的计数和时间将保持相对一致,因为它已经是正确的。
编辑:
为主文件添加了更多代码。
下面是用于创建图形的头文件
typedef struct _graph {
unsigned int n; // Number of vertices in the graph
unsigned int m; // Number of edges in the graph
unsigned int * E_u; // Edge i = (E_u[i],E_v[i])
unsigned int * E_v; //
unsigned int * V_ptr; // Edges incident on vertex u
// have ids V_ptr[u] ... V_ptr[u+1]-1
} GRAPH_t;
GRAPH_t * init_graph ( unsigned int n, unsigned int m, unsigned int seed,
unsigned int max_degree ) {
GRAPH_t * G = (GRAPH_t *) calloc(1, sizeof(GRAPH_t));
unsigned u, v, e, nbrs, first, lastplus1, maxvalue;
double fraction;
G->n = n;
G->E_u = (unsigned int *) calloc(m, sizeof(unsigned int));
G->E_v = (unsigned int *) calloc(m, sizeof(unsigned int));
G->V_ptr = (unsigned int *) calloc((G->n+1), sizeof(unsigned int));
srand48(seed);
unsigned int count = 0;
// Generate edges
G->V_ptr[0] = count;
for (u = 1; u < G->n; u++) {
G->V_ptr[u] = count;
switch (max_degree) {
case 0: // max_degree = 0 => max_degree = sqrt(n)
nbrs = sqrt(G->n); if (nbrs > u) nbrs = u;
break;
default:
nbrs = max_degree; if (nbrs > u) nbrs = u;
break;
}
first = G->V_ptr[u];
lastplus1 = first + nbrs; if (lastplus1 > m) lastplus1 = m;
if (first < lastplus1) {
for (e = first; e < lastplus1; e++)
G->E_v[e] = ((unsigned int) lrand48()) % G->n;
maxvalue = G->E_v[first];
for (e = first+1; e < lastplus1; e++) {
G->E_v[e] += G->E_v[e-1];
maxvalue = G->E_v[e];
}
for (e = first; e < lastplus1; e++) {
fraction = ((double) G->E_v[e])/(maxvalue+1+(lrand48()%G->n));
G->E_v[e] = (unsigned int) (fraction * u);
}
// Generate edges incident at u
G->E_u[count] = u;
G->E_v[count] = G->E_v[count];
count++;
for (e = first+1; e < lastplus1; e++) {
if (G->E_v[count-1] < G->E_v[e]) {
G->E_u[count] = u;
G->E_v[count] = G->E_v[e];
count++;
}
}
}
}
G->V_ptr[n] = count;
G->m = count-1; // Initialize number of edges
// Check graph
for (u = 0; u < G->n; u++) {
if (G->V_ptr[u] > G->V_ptr[u+1]) {
printf("Graph generation problem - 1!!!\n");
exit(0);
}
for (e = G->V_ptr[u]; e < G->V_ptr[u+1]; e++) {
if (G->E_u[e] != u) {
printf("Graph generation problem - 2!!!\n");
exit(0);
}
if (G->E_v[e] >= u) {
printf("Graph generation problem - 3!!!\n");
exit(0);
}
if ((e > G->V_ptr[u]) && (G->E_v[e] <= G->E_v[e-1])) {
printf("Graph generation problem - 4!!!\n");
exit(0);
}
}
}
return G;
}