查看: 5494|回复: 1

[求助] iMX8 GPU正在运行,CPU能不能执行其它任务?

[复制链接]

该用户从未签到

0

主题

0

帖子

0

新手上路

Rank: 1

积分
10
最后登录
2020-3-27
发表于 2020-3-27 09:59:33 | 显示全部楼层 |阅读模式
本帖最后由 yukang1744 于 2020-3-27 10:01 编辑

打算使用i.mx8X开发产品,用到GPU。我把算法通过OpenCL写到GPU中,算法占用50ms,这段时间CPU能不能执行其它任务?看了下面的代码:2个数组相加,猜测CPU阻塞在这里,直到GPU算出结果。如果CPU可以执行其它任务,怎么实现同步?(通知CPU可以取结果了)。CPU怎么把计算结果拷贝过来?
//************************************************************
// Demo OpenCL application to compute a simple vector addition
// computation between 2 arrays on the GPU
// ************************************************************
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <CL/cl.h>
//
// OpenCL source code
const char* OpenCLSource[] = {
"__kernel void VectorAdd(__global int* c, __global int* a,__global int* b)",
"{",
" // Index of the elements to add \n",
" unsigned int n = get_global_id(0);",
" // Sum the nth element of vectors a and b and store in c \n",
" c[n] = a[n] + b[n];",
"}"
};
// Some interesting data for the vectors
int InitialData1[80] = {37,50,54,50,56,0,43,43,74,71,32,36,16,43,56,100,50,25,15,17,37,50,54,50,56,0,43,43,74,71,32,36,16,43,56,100,50,25,15,17,37,50,54,50,56,0,43,43,74,71,32,36,16,43,56,100,50,25,15,17,37,50,54,50,56,0,43,43,74,71,32,36,16,43,56,100,50,25,15,17};
int InitialData2[80] = {35,51,54,58,55,32,36,69,27,39,35,40,16,44,55,14,58,75,18,15,35,51,54,58,55,32,36,69,27,39,35,40,16,44,55,14,58,75,18,15,35,51,54,58,55,32,36,69,27,39,35,40,16,44,55,14,58,75,18,15,35,51,54,58,55,32,36,69,27,39,35,40,16,44,55,14,58,75,18,15};
// Number of elements in the vectors to be added
#define SIZE 600000
// Main function
// ************************************************************
int main(int argc, char **argv)
{        
     // Two integer source vectors in Host memory
     int HostVector1[SIZE], HostVector2[SIZE];
     //Output Vector
     int HostOutputVector[SIZE];
     // Initialize with some interesting repeating data
     for(int c = 0; c < SIZE; c++)
     {
          HostVector1[c] = InitialData1[c%20];
          HostVector2[c] = InitialData2[c%20];
          HostOutputVector[c] = 0;
     }
     //Get an OpenCL platform
     cl_platform_id cpPlatform;
     clGetPlatformIDs(1, &cpPlatform, NULL);
     // Get a GPU device
     cl_device_id cdDevice;
     clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
     char cBuffer[1024];
     clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(cBuffer), &cBuffer, NULL);
     printf("CL_DEVICE_NAME: %s\n", cBuffer);
     clGetDeviceInfo(cdDevice, CL_DRIVER_VERSION, sizeof(cBuffer), &cBuffer, NULL);
     printf("CL_DRIVER_VERSION: %s\n\n", cBuffer);
     // Create a context to run OpenCL enabled GPU
     cl_context GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);     
     // Create a command-queue on the GPU device
     cl_command_queue cqCommandQueue = clCreateCommandQueue(GPUContext, cdDevice, CL_QUEUE_PROFILING_ENABLE, NULL);
     // Allocate GPU memory for source vectors AND initialize from CPU memory
     cl_mem GPUVector1 = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |
     CL_MEM_COPY_HOST_PTR, sizeof(int) * SIZE, HostVector1, NULL);
     cl_mem GPUVector2 = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |
     CL_MEM_COPY_HOST_PTR, sizeof(int) * SIZE, HostVector2, NULL);
     // Allocate output memory on GPU
     cl_mem GPUOutputVector = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY,
     sizeof(int) * SIZE, NULL, NULL);
     // Create OpenCL program with source code
     cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 7, OpenCLSource, NULL, NULL);
     // Build the program (OpenCL JIT compilation)
     clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
     // Create a handle to the compiled OpenCL function (Kernel)
     cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "VectorAdd", NULL);
     // In the next step we associate the GPU memory with the Kernel arguments
     clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), (void*)&GPUOutputVector);
     clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPUVector1);
     clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&GPUVector2);

     //create event
     cl_event event = clCreateUserEvent(GPUContext, NULL);

     // Launch the Kernel on the GPU
     // This kernel only uses global data
     size_t WorkSize[1] = {SIZE}; // one dimensional Range
     clEnqueueNDRangeKernel(cqCommandQueue, OpenCLVectorAdd, 1, NULL, WorkSize, NULL, 0, NULL, &event);
     // Copy the output in GPU memory back to CPU memory
     clEnqueueReadBuffer(cqCommandQueue, GPUOutputVector, CL_TRUE, 0,
     SIZE * sizeof(int), HostOutputVector, 0, NULL, NULL);
     // Cleanup
     clReleaseKernel(OpenCLVectorAdd);
     clReleaseProgram(OpenCLProgram);
     clReleaseCommandQueue(cqCommandQueue);
     clReleaseContext(GPUContext);
     clReleaseMemObject(GPUVector1);
     clReleaseMemObject(GPUVector2);
     clReleaseMemObject(GPUOutputVector);   

     clWaitForEvents(1, &event);
     cl_ulong start = 0, end = 0;
     double total_time;     

     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);

     total_time = end - start;      
     printf("\nExecution time in milliseconds = %0.3f ms", (total_time / 1000000.0) );
     printf("\nExecution time in seconds = %0.3f s\n\n", ((total_time / 1000000.0))/1000 );         

     return 0;
}



我知道答案 目前已有1人回答
回复

使用道具 举报

该用户从未签到

20

主题

809

帖子

1

超级版主

Rank: 8Rank: 8

积分
1566
最后登录
2023-1-17
发表于 2020-4-3 10:36:36 | 显示全部楼层
你这里是要用Linux还是Android 版本的BSP呢?
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 注册/登录

本版积分规则

关闭

站长推荐上一条 /3 下一条

Archiver|手机版|小黑屋|恩智浦技术社区

GMT+8, 2025-7-21 03:59 , Processed in 0.087233 second(s), 22 queries , MemCache On.

Powered by Discuz! X3.4

Copyright © 2001-2024, Tencent Cloud.

快速回复 返回顶部 返回列表