Results 1 to 8 of 8

Thread: Chapter 1: Creating and distributing a matrix-vector multiplication kernel

Threaded View

Previous Post Previous Post   Next Post Next Post
  1. #1
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,156
    Rep Power
    736

    Chapter 1: Creating and distributing a matrix-vector multiplication kernel

    This is the adapted code from listing 1.1 in chapter 1 of the book OpenCL in Action.
    The example basically performs this matrix multiplication:
    |  0.0   2.0   4.0   6.0 |   | 0.0 |   | 84.0  |
    |                        |   |     |   |       |
    |  8.0  10.0  12.0  14.0 |   | 3.0 |   | 228.0 | 
    |                        | x |     | = |       |
    | 16.0  18.0  20.0  22.0 |   | 6.0 |   | 372.0 |
    |                        |   |     |   |       |
    | 24.0  26.0  28.0  30.0 |   | 9.0 |   | 516.0 |
    
    Comparing to the original example I added more informative text information to console output.

    ' -- NOTE: Enhanced to list the result/expected result
    
    Uses "Console", "File"
    
    #INCLUDE "%APP_INCLUDEPATH%/cl/cl.tBasicU"
    
    Begin Const
      $PROGRAM_FILE = APP_SourcePath+"matvec.cl"
      $KERNEL_FUNC  = "matvec_mult"
    End Const
    
    Function TBMain()
    
       /* Host/device data structures */
       tcl_platform_id   platform
       tcl_device_id     device
       tcl_context       context
       tcl_command_queue queue
       tcl_int           i, ErrCl
    
       /* Program/kernel data structures */
       tcl_program program
       
       String program_buffer, program_log
       tSize program_size, log_size
       tcl_kernel kernel
       
       /* Data and buffers */
       Single mtx(16), vec(4), result(4)
       Single correct(4) = 0.0, 0.0, 0.0, 0.0
       tcl_mem mat_buff, vec_buff, res_buff
       tSize work_units_per_kernel
    
       /* Initialize data to be processed by the kernel */
       For i = 1 To 16
          mtx(i) = (i-1) * 2.0
       Next
       
       For i = 1 To 4
          vec(i) = (i-1) * 3.0
          correct(1) += mtx(i)    * vec(i)
          correct(2) += mtx(i+4)  * vec(i)
          correct(3) += mtx(i+8)  * vec(i)
          correct(4) += mtx(i+12) * vec(i)      
       Next
    
       /* Identify a platform */
       ErrCl = clGetPlatformIDs(1, platform, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any platforms")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Access a device */
       ErrCl = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, device, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any devices")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Create the context */
       context = clCreateContext(ByVal NULL, 1, device, ByVal NULL, ByVal NULL, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a context")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Read program file and place content into buffer */     
       program_buffer = FILE_Load($PROGRAM_FILE)   
       program_size   = Len(program_buffer)
       If (program_size = 0) Then
          perror("Couldn't find the program file")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
    
       /* Create program from file */   
       program = clCreateProgramWithSource(context, 1, VarPtr(program_buffer), program_size, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the program")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
       program_buffer = ""  
       
       /* Build program */
       ErrCl = clBuildProgram(program, 0, ByVal 0, "", 0, ByVal 0)
       If (ErrCl < 0) Then
    
          /* Find size of log and print to std output */
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                0, ByVal NULL, log_size)
          program_log = Repeat$(log_size+1, $NUL)
          
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                log_size + 1, ByVal StrPtr(program_log), ByVal NULL)
          PrintL program_log
          program_log = ""  
          
          APP_SetReturnCode(1) : waitkey : Exit Function 
       End If
    
       /* Create kernel for the mat_vec_mult function */
       kernel = clCreateKernel(program, StrPtr($KERNEL_FUNC), ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the kernel")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
    
       /* Create CL buffers to hold input and output data */
       mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*16, mtx, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a buffer object")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       
       vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*4, vec, ByVal NULL)
       res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
          SizeOf(Single)*4, ByVal NULL, ByVal NULL)
    
       /* Create kernel arguments from the CL buffers */
       ErrCl = clSetKernelArg(kernel, 0, SizeOf(tcl_mem), mat_buff)
       If (ErrCl < 0) Then
          perror("Couldn't set the kernel argument")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       
       clSetKernelArg(kernel, 1, SizeOf(tcl_mem), vec_buff)
       clSetKernelArg(kernel, 2, SizeOf(tcl_mem), res_buff)
    
       /* Create a CL command queue for the device*/
       queue = clCreateCommandQueue(context, device, 0, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the command queue")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
    
       /* Enqueue the command queue to the device */
       work_units_per_kernel = 4 /* 4 work-units per kernel */ 
       ErrCl = clEnqueueNDRangeKernel(queue, kernel, 1, ByVal NULL, work_units_per_kernel, 
          ByVal NULL, 0, ByVal NULL, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the kernel execution command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Read the result */
       ErrCl = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, SizeOf(Single)*4, 
          result, 0, ByVal NULL, ByVal NULL)
          
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the read buffer command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Test the result */
       If((result(1) = correct(1)) And (result(2) = correct(2)) And (result(3) = correct(3)) And (result(4) = correct(4))) Then
           
          PrintL("Matrix-vector multiplication successful.")
       
       Else 
       
          PrintL("Matrix-vector multiplication unsuccessful.")
          
       End If
       PrintL("Calculated result: " + Join$(result, ", "))
       PrintL("Expected result  : " + Join$(correct, ", "))
       
       /* Deallocate resources */
       clReleaseMemObject(mat_buff)
       clReleaseMemObject(vec_buff)
       clReleaseMemObject(res_buff)
       clReleaseKernel(kernel)
       clReleaseCommandQueue(queue)
       clReleaseProgram(program)
       clReleaseContext(context)
    
       PrintL "Press any key to continue..."   
       WaitKey
       APP_SetReturnCode(0)                            
    
    End Function
    
    Function pError( sError As String )
    
      Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_INTENSITY)
      PrintL sError
      Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_GREEN | %CONSOLE_FOREGROUND_BLUE)
      
    End Function
    
    You will need the latest ThinBASIC and OpenCL headers to run it + of course modern GPU or CPU.


    Enjoy!,
    Petr
    Attached Files Attached Files
    Last edited by Petr Schreiber; 18-09-2012 at 15:23.
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

Similar Threads

  1. Faster Standard Multiplication
    By danbaron in forum Other languages
    Replies: 1
    Last Post: 01-01-2012, 13:12
  2. Toom-Cook Multiplication
    By danbaron in forum Math: all about
    Replies: 9
    Last Post: 09-12-2011, 00:25
  3. OpenCL Kernel Code Decorator [Updated Sep 04 2011]
    By Petr Schreiber in forum OpenCL
    Replies: 2
    Last Post: 02-05-2010, 06:31
  4. Collaborative development of the Linux Kernel
    By Charles Pegge in forum General
    Replies: 3
    Last Post: 27-11-2009, 21:58
  5. Distributing ThinBasic with IndeED, what is needed?
    By Michael Hartlef in forum thinBasic General
    Replies: 4
    Last Post: 18-12-2006, 14:56

Members who have read this thread: 0

There are no members to list at the moment.

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •