Thread: Chapter 1: Creating and distributing a matrix-vector multiplication kernel

    Petr Schreiber
    Aug 2005
    Brno - Czech Republic
    Chapter 1: Creating and distributing a matrix-vector multiplication kernel

    This is the adapted code from listing 1.1 in chapter 1 of the book OpenCL in Action.
    The example basically performs this matrix multiplication:
    |  0.0   2.0   4.0   6.0 |   | 0.0 |   | 84.0  |
    |                        |   |     |   |       |
    |  8.0  10.0  12.0  14.0 |   | 3.0 |   | 228.0 | 
    |                        | x |     | = |       |
    | 16.0  18.0  20.0  22.0 |   | 6.0 |   | 372.0 |
    |                        |   |     |   |       |
    | 24.0  26.0  28.0  30.0 |   | 9.0 |   | 516.0 |
    Comparing to the original example I added more informative text information to console output.

    ' -- NOTE: Enhanced to list the result/expected result
    Uses "Console", "File"
    #INCLUDE "%APP_INCLUDEPATH%/cl/cl.tBasicU"
    Begin Const
      $PROGRAM_FILE = APP_SourcePath+""
      $KERNEL_FUNC  = "matvec_mult"
    End Const
    Function TBMain()
       /* Host/device data structures */
       tcl_platform_id   platform
       tcl_device_id     device
       tcl_context       context
       tcl_command_queue queue
       tcl_int           i, ErrCl
       /* Program/kernel data structures */
       tcl_program program
       String program_buffer, program_log
       tSize program_size, log_size
       tcl_kernel kernel
       /* Data and buffers */
       Single mtx(16), vec(4), result(4)
       Single correct(4) = 0.0, 0.0, 0.0, 0.0
       tcl_mem mat_buff, vec_buff, res_buff
       tSize work_units_per_kernel
       /* Initialize data to be processed by the kernel */
       For i = 1 To 16
          mtx(i) = (i-1) * 2.0
       For i = 1 To 4
          vec(i) = (i-1) * 3.0
          correct(1) += mtx(i)    * vec(i)
          correct(2) += mtx(i+4)  * vec(i)
          correct(3) += mtx(i+8)  * vec(i)
          correct(4) += mtx(i+12) * vec(i)      
       /* Identify a platform */
       ErrCl = clGetPlatformIDs(1, platform, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any platforms")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       /* Access a device */
       ErrCl = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, device, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any devices")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       /* Create the context */
       context = clCreateContext(ByVal NULL, 1, device, ByVal NULL, ByVal NULL, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a context")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       /* Read program file and place content into buffer */     
       program_buffer = FILE_Load($PROGRAM_FILE)   
       program_size   = Len(program_buffer)
       If (program_size = 0) Then
          perror("Couldn't find the program file")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
       /* Create program from file */   
       program = clCreateProgramWithSource(context, 1, VarPtr(program_buffer), program_size, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the program")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
       program_buffer = ""  
       /* Build program */
       ErrCl = clBuildProgram(program, 0, ByVal 0, "", 0, ByVal 0)
       If (ErrCl < 0) Then
          /* Find size of log and print to std output */
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                0, ByVal NULL, log_size)
          program_log = Repeat$(log_size+1, $NUL)
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                log_size + 1, ByVal StrPtr(program_log), ByVal NULL)
          PrintL program_log
          program_log = ""  
          APP_SetReturnCode(1) : waitkey : Exit Function 
       End If
       /* Create kernel for the mat_vec_mult function */
       kernel = clCreateKernel(program, StrPtr($KERNEL_FUNC), ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the kernel")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
       /* Create CL buffers to hold input and output data */
       mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*16, mtx, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a buffer object")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*4, vec, ByVal NULL)
       res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
          SizeOf(Single)*4, ByVal NULL, ByVal NULL)
       /* Create kernel arguments from the CL buffers */
       ErrCl = clSetKernelArg(kernel, 0, SizeOf(tcl_mem), mat_buff)
       If (ErrCl < 0) Then
          perror("Couldn't set the kernel argument")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       clSetKernelArg(kernel, 1, SizeOf(tcl_mem), vec_buff)
       clSetKernelArg(kernel, 2, SizeOf(tcl_mem), res_buff)
       /* Create a CL command queue for the device*/
       queue = clCreateCommandQueue(context, device, 0, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the command queue")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
       /* Enqueue the command queue to the device */
       work_units_per_kernel = 4 /* 4 work-units per kernel */ 
       ErrCl = clEnqueueNDRangeKernel(queue, kernel, 1, ByVal NULL, work_units_per_kernel, 
          ByVal NULL, 0, ByVal NULL, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the kernel execution command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       /* Read the result */
       ErrCl = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, SizeOf(Single)*4, 
          result, 0, ByVal NULL, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the read buffer command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       /* Test the result */
       If((result(1) = correct(1)) And (result(2) = correct(2)) And (result(3) = correct(3)) And (result(4) = correct(4))) Then
          PrintL("Matrix-vector multiplication successful.")
          PrintL("Matrix-vector multiplication unsuccessful.")
       End If
       PrintL("Calculated result: " + Join$(result, ", "))
       PrintL("Expected result  : " + Join$(correct, ", "))
       /* Deallocate resources */
       PrintL "Press any key to continue..."   
    End Function
    Function pError( sError As String )
      PrintL sError
    End Function
    You will need the latest ThinBASIC and OpenCL headers to run it + of course modern GPU or CPU.

    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

