waste.c 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. // gcc -O2 waste.c -lpthread -owaste
  2. // gcc -O2 waste.c -lpthread -owaste -DMEM
  3. #define _GNU_SOURCE
  4. #include <stdio.h>
  5. #include <math.h>
  6. #include <sched.h>
  7. #include <string.h>
  8. #include <stdlib.h>
  9. #include <unistd.h>
  10. #include <pthread.h>
  11. #include <arm_neon.h>
  12. #include <sys/sysinfo.h>
  13. #include "../common/timing.h"
  14. int get_nprocs(void);
  15. double *ttime, *oout;
  16. void waste(int pid) {
  17. cpu_set_t my_set;
  18. CPU_ZERO(&my_set);
  19. CPU_SET(pid, &my_set);
  20. int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
  21. printf("set affinity to %d: %d\n", pid, ret);
  22. // 128 MB
  23. float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
  24. // comment out the memset for CPU only and not RAM
  25. // otherwise we need this to avoid the zero page
  26. #ifdef MEM
  27. memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
  28. #endif
  29. float32x4_t out;
  30. double sec = seconds_since_boot();
  31. while (1) {
  32. for (int i = 0; i < 0x10; i++) {
  33. for (int j = 0; j < 0x800000; j+=0x20) {
  34. out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
  35. out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
  36. out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
  37. out = vmlaq_f32(out, tmp[j+6], tmp[j+7]);
  38. out = vmlaq_f32(out, tmp[j+8], tmp[j+9]);
  39. out = vmlaq_f32(out, tmp[j+10], tmp[j+11]);
  40. out = vmlaq_f32(out, tmp[j+12], tmp[j+13]);
  41. out = vmlaq_f32(out, tmp[j+14], tmp[j+15]);
  42. out = vmlaq_f32(out, tmp[j+16], tmp[j+17]);
  43. out = vmlaq_f32(out, tmp[j+18], tmp[j+19]);
  44. out = vmlaq_f32(out, tmp[j+20], tmp[j+21]);
  45. out = vmlaq_f32(out, tmp[j+22], tmp[j+23]);
  46. out = vmlaq_f32(out, tmp[j+24], tmp[j+25]);
  47. out = vmlaq_f32(out, tmp[j+26], tmp[j+27]);
  48. out = vmlaq_f32(out, tmp[j+28], tmp[j+29]);
  49. out = vmlaq_f32(out, tmp[j+30], tmp[j+31]);
  50. }
  51. }
  52. double nsec = seconds_since_boot();
  53. ttime[pid] = nsec-sec;
  54. oout[pid] = out[0] + out[1] + out[2] + out[3];
  55. sec = nsec;
  56. }
  57. }
  58. int main() {
  59. int CORES = get_nprocs();
  60. ttime = (double *)malloc(CORES*sizeof(double));
  61. oout = (double *)malloc(CORES*sizeof(double));
  62. pthread_t waster[CORES];
  63. for (long i = 0; i < CORES; i++) {
  64. ttime[i] = NAN;
  65. pthread_create(&waster[i], NULL, (void *(*)(void *))waste, (void*)i);
  66. }
  67. while (1) {
  68. double avg = 0.0;
  69. double iavg = 0.0;
  70. for (int i = 0; i < CORES; i++) {
  71. avg += ttime[i];
  72. iavg += 1/ttime[i];
  73. printf("%4.2f ", ttime[i]);
  74. }
  75. double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
  76. printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
  77. sleep(1);
  78. }
  79. }