numactl --interleave=all ./testing_zgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_zgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F
=======================================================================
  100   100     ---   (  ---  )      2.42 (   0.00)     ---
 1000  1000     ---   (  ---  )    136.02 (   0.04)     ---
   10    10     ---   (  ---  )      0.13 (   0.00)     ---
   20    20     ---   (  ---  )      0.76 (   0.00)     ---
   30    30     ---   (  ---  )      1.85 (   0.00)     ---
   40    40     ---   (  ---  )      0.93 (   0.00)     ---
   50    50     ---   (  ---  )      1.60 (   0.00)     ---
   60    60     ---   (  ---  )      2.30 (   0.00)     ---
   70    70     ---   (  ---  )      1.73 (   0.00)     ---
   80    80     ---   (  ---  )      2.62 (   0.00)     ---
   90    90     ---   (  ---  )      3.47 (   0.00)     ---
  100   100     ---   (  ---  )      4.42 (   0.00)     ---
  200   200     ---   (  ---  )     14.01 (   0.00)     ---
  300   300     ---   (  ---  )     29.03 (   0.00)     ---
  400   400     ---   (  ---  )     44.79 (   0.01)     ---
  500   500     ---   (  ---  )     62.82 (   0.01)     ---
  600   600     ---   (  ---  )     77.66 (   0.01)     ---
  700   700     ---   (  ---  )     97.16 (   0.02)     ---
  800   800     ---   (  ---  )    114.19 (   0.02)     ---
  900   900     ---   (  ---  )    132.77 (   0.03)     ---
 1000  1000     ---   (  ---  )    149.44 (   0.04)     ---
 2000  2000     ---   (  ---  )    358.56 (   0.12)     ---
 3000  3000     ---   (  ---  )    579.59 (   0.25)     ---
 4000  4000     ---   (  ---  )    725.78 (   0.47)     ---
 5000  5000     ---   (  ---  )    755.44 (   0.88)     ---
 6000  6000     ---   (  ---  )    859.64 (   1.34)     ---
 7000  7000     ---   (  ---  )    936.70 (   1.95)     ---
 8000  8000     ---   (  ---  )    974.19 (   2.80)     ---
 9000  9000     ---   (  ---  )   1006.34 (   3.86)     ---
10000 10000     ---   (  ---  )   1023.16 (   5.21)     ---
12000 12000     ---   (  ---  )   1058.95 (   8.70)     ---
14000 14000     ---   (  ---  )   1065.63 (  13.74)     ---
16000 16000     ---   (  ---  )   1082.08 (  20.19)     ---
18000 18000     ---   (  ---  )   1054.27 (  29.51)     ---
20000 20000     ---   (  ---  )   1072.48 (  39.79)     ---

numactl --interleave=all ./testing_zgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_zgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||Ax-b||_F/(N*||A||_F*||x||_F)
====================================================================================
  100   100     ---   (  ---  )      1.61 (   0.00)     ---
 1000  1000     ---   (  ---  )    136.16 (   0.04)     ---
   10    10     ---   (  ---  )      0.01 (   0.00)     ---
   20    20     ---   (  ---  )      0.05 (   0.00)     ---
   30    30     ---   (  ---  )      0.14 (   0.00)     ---
   40    40     ---   (  ---  )      0.33 (   0.00)     ---
   50    50     ---   (  ---  )      0.60 (   0.00)     ---
   60    60     ---   (  ---  )      0.97 (   0.00)     ---
   70    70     ---   (  ---  )      2.14 (   0.00)     ---
   80    80     ---   (  ---  )      3.21 (   0.00)     ---
   90    90     ---   (  ---  )      3.56 (   0.00)     ---
  100   100     ---   (  ---  )      2.70 (   0.00)     ---
  200   200     ---   (  ---  )     11.01 (   0.00)     ---
  300   300     ---   (  ---  )     24.71 (   0.01)     ---
  400   400     ---   (  ---  )     40.07 (   0.01)     ---
  500   500     ---   (  ---  )     54.69 (   0.01)     ---
  600   600     ---   (  ---  )     73.37 (   0.02)     ---
  700   700     ---   (  ---  )     87.19 (   0.02)     ---
  800   800     ---   (  ---  )    105.31 (   0.03)     ---
  900   900     ---   (  ---  )    122.82 (   0.03)     ---
 1000  1000     ---   (  ---  )    139.42 (   0.04)     ---
 2000  2000     ---   (  ---  )    349.09 (   0.12)     ---
 3000  3000     ---   (  ---  )    579.17 (   0.25)     ---
 4000  4000     ---   (  ---  )    727.51 (   0.47)     ---
 5000  5000     ---   (  ---  )    755.23 (   0.88)     ---
 6000  6000     ---   (  ---  )    867.98 (   1.33)     ---
 7000  7000     ---   (  ---  )    937.94 (   1.95)     ---
 8000  8000     ---   (  ---  )    975.82 (   2.80)     ---
 9000  9000     ---   (  ---  )    997.39 (   3.90)     ---
10000 10000     ---   (  ---  )   1013.86 (   5.26)     ---
12000 12000     ---   (  ---  )   1052.61 (   8.76)     ---
14000 14000     ---   (  ---  )   1063.15 (  13.77)     ---
16000 16000     ---   (  ---  )   1087.37 (  20.09)     ---
18000 18000     ---   (  ---  )   1050.26 (  29.62)     ---
20000 20000     ---   (  ---  )   1071.03 (  39.84)     ---
