loadyuv.cc 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #include "selfdrive/modeld/transforms/loadyuv.h"
  2. #include <cassert>
  3. #include <cstdio>
  4. #include <cstring>
  5. void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
  6. memset(s, 0, sizeof(*s));
  7. s->width = width;
  8. s->height = height;
  9. char args[1024];
  10. snprintf(args, sizeof(args),
  11. "-cl-fast-relaxed-math -cl-denorms-are-zero "
  12. "-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d",
  13. width, height);
  14. cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args);
  15. s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
  16. s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
  17. s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err));
  18. // done with this
  19. CL_CHECK(clReleaseProgram(prg));
  20. }
  21. void loadyuv_destroy(LoadYUVState* s) {
  22. CL_CHECK(clReleaseKernel(s->loadys_krnl));
  23. CL_CHECK(clReleaseKernel(s->loaduv_krnl));
  24. CL_CHECK(clReleaseKernel(s->copy_krnl));
  25. }
  26. void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
  27. cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
  28. cl_mem out_cl, bool do_shift) {
  29. cl_int global_out_off = 0;
  30. if (do_shift) {
  31. // shift the image in slot 1 to slot 0, then place the new image in slot 1
  32. global_out_off += (s->width*s->height) + (s->width/2)*(s->height/2)*2;
  33. CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &out_cl));
  34. CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_int), &global_out_off));
  35. const size_t copy_work_size = global_out_off/8;
  36. CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
  37. &copy_work_size, NULL, 0, 0, NULL));
  38. }
  39. CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
  40. CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
  41. CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off));
  42. const size_t loadys_work_size = (s->width*s->height)/8;
  43. CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
  44. &loadys_work_size, NULL, 0, 0, NULL));
  45. const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8;
  46. global_out_off += (s->width*s->height);
  47. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl));
  48. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
  49. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
  50. CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
  51. &loaduv_work_size, NULL, 0, 0, NULL));
  52. global_out_off += (s->width/2)*(s->height/2);
  53. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl));
  54. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
  55. CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
  56. CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
  57. &loaduv_work_size, NULL, 0, 0, NULL));
  58. }