#include "selfdrive/modeld/transforms/loadyuv.h" #include #include #include void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) { memset(s, 0, sizeof(*s)); s->width = width; s->height = height; char args[1024]; snprintf(args, sizeof(args), "-cl-fast-relaxed-math -cl-denorms-are-zero " "-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d", width, height); cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args); s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err)); s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err)); s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err)); // done with this CL_CHECK(clReleaseProgram(prg)); } void loadyuv_destroy(LoadYUVState* s) { CL_CHECK(clReleaseKernel(s->loadys_krnl)); CL_CHECK(clReleaseKernel(s->loaduv_krnl)); CL_CHECK(clReleaseKernel(s->copy_krnl)); } void loadyuv_queue(LoadYUVState* s, cl_command_queue q, cl_mem y_cl, cl_mem u_cl, cl_mem v_cl, cl_mem out_cl, bool do_shift) { cl_int global_out_off = 0; if (do_shift) { // shift the image in slot 1 to slot 0, then place the new image in slot 1 global_out_off += (s->width*s->height) + (s->width/2)*(s->height/2)*2; CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &out_cl)); CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_int), &global_out_off)); const size_t copy_work_size = global_out_off/8; CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL, ©_work_size, NULL, 0, 0, NULL)); } CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl)); CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl)); CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off)); const size_t loadys_work_size = (s->width*s->height)/8; CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL, &loadys_work_size, NULL, 0, 0, NULL)); const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8; global_out_off += (s->width*s->height); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl)); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, &loaduv_work_size, NULL, 0, 0, NULL)); global_out_off += (s->width/2)*(s->height/2); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl)); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, &loaduv_work_size, NULL, 0, 0, NULL)); }