transform.cc 4.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #include "selfdrive/modeld/transforms/transform.h"
  2. #include <cassert>
  3. #include <cstring>
  4. #include "common/clutil.h"
  5. void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) {
  6. memset(s, 0, sizeof(*s));
  7. cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, "");
  8. s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err));
  9. // done with this
  10. CL_CHECK(clReleaseProgram(prg));
  11. s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
  12. s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
  13. }
  14. void transform_destroy(Transform* s) {
  15. CL_CHECK(clReleaseMemObject(s->m_y_cl));
  16. CL_CHECK(clReleaseMemObject(s->m_uv_cl));
  17. CL_CHECK(clReleaseKernel(s->krnl));
  18. }
  19. void transform_queue(Transform* s,
  20. cl_command_queue q,
  21. cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
  22. cl_mem out_y, cl_mem out_u, cl_mem out_v,
  23. int out_width, int out_height,
  24. const mat3& projection) {
  25. const int zero = 0;
  26. // sampled using pixel center origin
  27. // (because that's how fastcv and opencv does it)
  28. mat3 projection_y = projection;
  29. // in and out uv is half the size of y.
  30. mat3 projection_uv = transform_scale_buffer(projection, 0.5);
  31. CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL));
  32. CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL));
  33. const int in_y_width = in_width;
  34. const int in_y_height = in_height;
  35. const int in_y_px_stride = 1;
  36. const int in_uv_width = in_width/2;
  37. const int in_uv_height = in_height/2;
  38. const int in_uv_px_stride = 2;
  39. const int in_u_offset = in_uv_offset;
  40. const int in_v_offset = in_uv_offset + 1;
  41. const int out_y_width = out_width;
  42. const int out_y_height = out_height;
  43. const int out_uv_width = out_width/2;
  44. const int out_uv_height = out_height/2;
  45. CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src
  46. CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride
  47. CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride
  48. CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset
  49. CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows
  50. CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols
  51. CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst
  52. CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride
  53. CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
  54. CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows
  55. CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols
  56. CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M
  57. const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
  58. CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
  59. (const size_t*)&work_size_y, NULL, 0, 0, NULL));
  60. const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
  61. CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride
  62. CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset
  63. CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows
  64. CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols
  65. CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst
  66. CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride
  67. CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
  68. CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows
  69. CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols
  70. CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M
  71. CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
  72. (const size_t*)&work_size_uv, NULL, 0, 0, NULL));
  73. CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset
  74. CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst
  75. CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
  76. (const size_t*)&work_size_uv, NULL, 0, 0, NULL));
  77. }