#include <stdlib.h>
#include <m4ri/config.h>
#include <m4ri/m4ri.h>
#include "cpucycles.h"
#include "benchmarking.h"
#ifdef HAVE_LIBPAPI
#define _GNU_SOURCE
#include <sys/types.h>
#include <papi.h>
#include <errno.h>
#endif
struct elim_params {
char const *algorithm;
};
static unsigned long long loop_calibration[32];
int run_nothing(void *_p, unsigned long long *data, int *data_len) {
struct elim_params *p = (struct elim_params *)_p;
if(p->r != 0) {
for (
rci_t i = 0; i < p->m; ++i) {
}
}
if(i < p->r) {
} else {
}
}
}
} else {
}
#ifndef HAVE_LIBPAPI
*data_len = 2;
#else
*data_len =
MIN(papi_array_len + 1, *data_len);
#endif
int papi_res;
#ifndef HAVE_LIBPAPI
data[0] = walltime(0);
data[1] = cpucycles();
#else
int array_len = *data_len - 1;
unsigned long long t0 = PAPI_get_virt_usec();
papi_res = PAPI_start_counters((int*)papi_events, array_len);
if(papi_res)
#endif
#ifndef HAVE_LIBPAPI
data[1] = cpucycles() - data[1];
data[0] = walltime(data[0]);
#else
PAPI_stop_counters((long long*)&data[1], array_len);
t0 = PAPI_get_virt_usec() - t0;
data[0] = t0;
for (int nv = 0; nv <= array_len; ++nv) {
if (data[nv] < loop_calibration[nv])
loop_calibration[nv] = data[nv];
}
#endif
return (0);
}
int run(void *_p, unsigned long long *data, int *data_len) {
struct elim_params *p = (struct elim_params *)_p;
#ifndef HAVE_LIBPAPI
*data_len = 2;
#else
*data_len =
MIN(papi_array_len + 1, *data_len);
#endif
int papi_res;
if(p->r != 0) {
for (
rci_t i = 0; i < p->m; ++i) {
}
}
if(i < p->r) {
} else {
}
}
}
} else {
}
#ifndef HAVE_LIBPAPI
data[0] = walltime(0);
data[1] = cpucycles();
#else
int array_len = *data_len - 1;
unsigned long long t0 = PAPI_get_virt_usec();
papi_res = PAPI_start_counters((int*)papi_events, array_len);
if (papi_res)
#endif
if(strcmp(p->algorithm, "m4ri") == 0)
else if(strcmp(p->algorithm, "pluq") == 0)
else if(strcmp(p->algorithm, "mmpf") == 0)
else if(strcmp(p->algorithm, "naive") == 0)
#ifndef HAVE_LIBPAPI
data[1] = cpucycles() - data[1];
data[0] = walltime(data[0]);
#else
PAPI_stop_counters((long long*)&data[1], array_len);
t0 = PAPI_get_virt_usec() - t0;
data[0] = t0;
for (int nv = 0; nv <= array_len; ++nv) {
data[nv] -= loop_calibration[nv];
}
#endif
return 0;
}
void print_help_and_exit() {
printf("Parameters m(, n, alg, r) expected.\n");
printf(" m -- integer > 0\n");
printf(" n -- integer > 0\n");
printf(" alg -- 'm4ri', 'pluq', 'mmpf' or 'naive' (default: 'pluq')\n");
printf(" r -- target rank >= 0, if 0 then mzd_randomize() is called (default: MIN(m,n))\n");
printf("\n");
bench_print_global_options(stderr);
}
int main(int argc, char **argv) {
int opts = global_options(&argc, &argv);
int data_len;
#ifdef HAVE_LIBPAPI
int papi_counters = PAPI_num_counters();
if (papi_counters < papi_array_len) {
fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters);
papi_array_len = papi_counters;
}
if (papi_test(papi_events, papi_array_len))
exit(1);
for (int nv = 0; nv <= papi_array_len; ++nv)
loop_calibration[nv] = 100000000;
data_len = papi_array_len + 1;
#else
data_len = 2;
#endif
if (opts < 0 || argc < 2 || argc > 5) {
print_help_and_exit();
}
struct elim_params params;
params.m = atoi(argv[1]);
if (argc >= 3)
params.n = atoi(argv[2]);
else
params.n = params.m;
if (argc >= 4)
params.algorithm = argv[3];
else
params.algorithm = "pluq";
if (argc >= 5)
params.r = atoi(argv[4]);
else
params.r = params.m;
srandom(17);
unsigned long long data[16];
for (int i = 0; i < 4; ++i)
run_nothing((void*)¶ms, data, &data_len);
run_bench(run, (void*)¶ms, data, data_len);
double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n * powl((double)params.r,0.807) );
printf("m: %5d, n: %5d, last r: %5d, cpu cycles: %12llu, cc/(mnr^0.807): %.5lf, ", params.m, params.n, params.r, data[1], cc_per_op);
print_wall_time(data[0] / 1000000.0);
printf(", ");
print_cpu_time(data[1] / (double)cpucycles_persecond());
printf("\n");
#ifdef HAVE_LIBPAPI
for (int n = 1; n < data_len; ++n) {
double tmp = ((double)data[n]) / powl((double)params.n,2.807);
printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp);
}
#endif
}