// RUN: %libomp-compile && env LIBOMP_NUM_HIDDEN_HELPER_THREADS=0 OMP_PROC_BIND=close OMP_PLACES=cores KMP_AFFINITY=verbose %libomp-run 8 1 4 // REQUIRES: linux // // This test pthread_creates 8 root threads before any OpenMP // runtime entry is ever called. We have all the root threads // register with the runtime by calling omp_set_num_threads(), // but this does not initialize their affinity. The fourth root thread // then calls a parallel region and we make sure its affinity // is correct. We also make sure all the other root threads are // free-floating since they have not called into a parallel region. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include "libomp_test_affinity.h" volatile int entry_flag = 0; volatile int flag = 0; volatile int num_roots_arrived = 0; int num_roots; int spawner = 0; pthread_mutex_t lock; int register_workers = 0; // boolean affinity_mask_t *full_mask; int __kmpc_global_thread_num(void*); int get_os_thread_id() { return (int)syscall(SYS_gettid); } int place_and_affinity_match() { int i, max_cpu; char buf[512]; affinity_mask_t *mask = affinity_mask_alloc(); int place = omp_get_place_num(); int num_procs = omp_get_place_num_procs(place); int *ids = (int*)malloc(sizeof(int) * num_procs); omp_get_place_proc_ids(place, ids); get_thread_affinity(mask); affinity_mask_snprintf(buf, sizeof(buf), mask); printf("Primary Thread Place: %d\n", place); printf("Primary Thread mask: %s\n", buf); for (i = 0; i < num_procs; ++i) { int cpu = ids[i]; if (!affinity_mask_isset(mask, cpu)) return 0; } max_cpu = AFFINITY_MAX_CPUS; for (i = 0; i < max_cpu; ++i) { int cpu = i; if (affinity_mask_isset(mask, cpu)) { int j, found = 0; for (j = 0; j < num_procs; ++j) { if (ids[j] == cpu) { found = 1; break; } } if (!found) return 0; } } affinity_mask_free(mask); free(ids); return 1; } void* thread_func(void *arg) { int place, nplaces; int root_id = *((int*)arg); int pid = getpid(); int tid = get_os_thread_id(); // Order how the root threads are assigned a gtid in the runtime // i.e., root_id = gtid while (1) { int v = entry_flag; if (v == root_id) break; } // If main root thread if (root_id == spawner) { printf("Initial application thread (pid=%d, tid=%d, spawner=%d) reached thread_func (will call OpenMP)\n", pid, tid, spawner); omp_set_num_threads(4); #pragma omp atomic entry_flag++; // Wait for the workers to signal their arrival before #pragma omp parallel while (num_roots_arrived < num_roots - 1) {} // This will trigger the output for KMP_AFFINITY in this case #pragma omp parallel { int gtid = __kmpc_global_thread_num(NULL); #pragma omp single { printf("Exactly %d threads in the #pragma omp parallel\n", omp_get_num_threads()); } #pragma omp critical { printf("OpenMP thread %d: gtid=%d\n", omp_get_thread_num(), gtid); } } flag = 1; if (!place_and_affinity_match()) { fprintf(stderr, "error: place and affinity mask do not match for primary thread\n"); exit (EXIT_FAILURE); } } else { // If worker root thread // Worker root threads, register with OpenMP through omp_set_num_threads() // if designated to, signal their arrival and then wait for the main root // thread to signal them to exit. printf("New root pthread (pid=%d, tid=%d) reached thread_func\n", pid, tid); if (register_workers) omp_set_num_threads(4); #pragma omp atomic entry_flag++; pthread_mutex_lock(&lock); num_roots_arrived++; pthread_mutex_unlock(&lock); while (flag == 0) {} // Main check whether root threads' mask is equal to the // initial affinity mask affinity_mask_t *mask = affinity_mask_alloc(); get_thread_affinity(mask); if (!affinity_mask_equal(mask, full_mask)) { char buf[1024]; printf("root thread %d mask: ", root_id); affinity_mask_snprintf(buf, sizeof(buf), mask); printf("initial affinity mask: %s\n", buf); fprintf(stderr, "error: root thread %d affinity mask not equal" " to initial full mask\n", root_id); affinity_mask_free(mask); exit(EXIT_FAILURE); } affinity_mask_free(mask); } return NULL; } int main(int argc, char** argv) { int i; if (argc != 3 && argc != 4) { fprintf(stderr, "usage: %s []\n", argv[0]); exit(EXIT_FAILURE); } // Initialize pthread mutex pthread_mutex_init(&lock, NULL); // Get initial full mask full_mask = affinity_mask_alloc(); get_thread_affinity(full_mask); // Get the number of root pthreads to create and allocate resources for them num_roots = atoi(argv[1]); pthread_t *roots = (pthread_t*)malloc(sizeof(pthread_t) * num_roots); int *root_ids = (int*)malloc(sizeof(int) * num_roots); // Get the flag indicating whether to have root pthreads call omp_set_num_threads() or not register_workers = atoi(argv[2]); if (argc == 4) spawner = atoi(argv[3]); // Spawn worker root threads for (i = 1; i < num_roots; ++i) { *(root_ids + i) = i; pthread_create(roots + i, NULL, thread_func, root_ids + i); } // Have main root thread (root 0) go into thread_func *root_ids = 0; thread_func(root_ids); // Cleanup all resources for (i = 1; i < num_roots; ++i) { void *status; pthread_join(roots[i], &status); } free(roots); free(root_ids); pthread_mutex_destroy(&lock); return EXIT_SUCCESS; }