Results 1 to 8 of 8

Thread: Chapter 1: Creating and distributing a matrix-vector multiplication kernel

  1. #1
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,146
    Rep Power
    735

    Chapter 1: Creating and distributing a matrix-vector multiplication kernel

    This is the adapted code from listing 1.1 in chapter 1 of the book OpenCL in Action.
    The example basically performs this matrix multiplication:
    |  0.0   2.0   4.0   6.0 |   | 0.0 |   | 84.0  |
    |                        |   |     |   |       |
    |  8.0  10.0  12.0  14.0 |   | 3.0 |   | 228.0 | 
    |                        | x |     | = |       |
    | 16.0  18.0  20.0  22.0 |   | 6.0 |   | 372.0 |
    |                        |   |     |   |       |
    | 24.0  26.0  28.0  30.0 |   | 9.0 |   | 516.0 |
    
    Comparing to the original example I added more informative text information to console output.

    ' -- NOTE: Enhanced to list the result/expected result
    
    Uses "Console", "File"
    
    #INCLUDE "%APP_INCLUDEPATH%/cl/cl.tBasicU"
    
    Begin Const
      $PROGRAM_FILE = APP_SourcePath+"matvec.cl"
      $KERNEL_FUNC  = "matvec_mult"
    End Const
    
    Function TBMain()
    
       /* Host/device data structures */
       tcl_platform_id   platform
       tcl_device_id     device
       tcl_context       context
       tcl_command_queue queue
       tcl_int           i, ErrCl
    
       /* Program/kernel data structures */
       tcl_program program
       
       String program_buffer, program_log
       tSize program_size, log_size
       tcl_kernel kernel
       
       /* Data and buffers */
       Single mtx(16), vec(4), result(4)
       Single correct(4) = 0.0, 0.0, 0.0, 0.0
       tcl_mem mat_buff, vec_buff, res_buff
       tSize work_units_per_kernel
    
       /* Initialize data to be processed by the kernel */
       For i = 1 To 16
          mtx(i) = (i-1) * 2.0
       Next
       
       For i = 1 To 4
          vec(i) = (i-1) * 3.0
          correct(1) += mtx(i)    * vec(i)
          correct(2) += mtx(i+4)  * vec(i)
          correct(3) += mtx(i+8)  * vec(i)
          correct(4) += mtx(i+12) * vec(i)      
       Next
    
       /* Identify a platform */
       ErrCl = clGetPlatformIDs(1, platform, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any platforms")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Access a device */
       ErrCl = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, device, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't find any devices")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Create the context */
       context = clCreateContext(ByVal NULL, 1, device, ByVal NULL, ByVal NULL, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a context")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Read program file and place content into buffer */     
       program_buffer = FILE_Load($PROGRAM_FILE)   
       program_size   = Len(program_buffer)
       If (program_size = 0) Then
          perror("Couldn't find the program file")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
    
       /* Create program from file */   
       program = clCreateProgramWithSource(context, 1, VarPtr(program_buffer), program_size, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the program")
          APP_SetReturnCode(1) : WaitKey : Exit Function 
       End If
       program_buffer = ""  
       
       /* Build program */
       ErrCl = clBuildProgram(program, 0, ByVal 0, "", 0, ByVal 0)
       If (ErrCl < 0) Then
    
          /* Find size of log and print to std output */
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                0, ByVal NULL, log_size)
          program_log = Repeat$(log_size+1, $NUL)
          
          clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
                log_size + 1, ByVal StrPtr(program_log), ByVal NULL)
          PrintL program_log
          program_log = ""  
          
          APP_SetReturnCode(1) : waitkey : Exit Function 
       End If
    
       /* Create kernel for the mat_vec_mult function */
       kernel = clCreateKernel(program, StrPtr($KERNEL_FUNC), ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the kernel")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
    
       /* Create CL buffers to hold input and output data */
       mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*16, mtx, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create a buffer object")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       
       vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY | 
          CL_MEM_COPY_HOST_PTR, SizeOf(Single)*4, vec, ByVal NULL)
       res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
          SizeOf(Single)*4, ByVal NULL, ByVal NULL)
    
       /* Create kernel arguments from the CL buffers */
       ErrCl = clSetKernelArg(kernel, 0, SizeOf(tcl_mem), mat_buff)
       If (ErrCl < 0) Then
          perror("Couldn't set the kernel argument")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
       
       clSetKernelArg(kernel, 1, SizeOf(tcl_mem), vec_buff)
       clSetKernelArg(kernel, 2, SizeOf(tcl_mem), res_buff)
    
       /* Create a CL command queue for the device*/
       queue = clCreateCommandQueue(context, device, 0, ErrCl)
       If (ErrCl < 0) Then
          perror("Couldn't create the command queue")
          APP_SetReturnCode(1) : waitkey : Exit Function
       End If
    
       /* Enqueue the command queue to the device */
       work_units_per_kernel = 4 /* 4 work-units per kernel */ 
       ErrCl = clEnqueueNDRangeKernel(queue, kernel, 1, ByVal NULL, work_units_per_kernel, 
          ByVal NULL, 0, ByVal NULL, ByVal NULL)
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the kernel execution command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Read the result */
       ErrCl = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, SizeOf(Single)*4, 
          result, 0, ByVal NULL, ByVal NULL)
          
       If (ErrCl < 0) Then
          perror("Couldn't enqueue the read buffer command")
          APP_SetReturnCode(1) : WaitKey : Exit Function
       End If
    
       /* Test the result */
       If((result(1) = correct(1)) And (result(2) = correct(2)) And (result(3) = correct(3)) And (result(4) = correct(4))) Then
           
          PrintL("Matrix-vector multiplication successful.")
       
       Else 
       
          PrintL("Matrix-vector multiplication unsuccessful.")
          
       End If
       PrintL("Calculated result: " + Join$(result, ", "))
       PrintL("Expected result  : " + Join$(correct, ", "))
       
       /* Deallocate resources */
       clReleaseMemObject(mat_buff)
       clReleaseMemObject(vec_buff)
       clReleaseMemObject(res_buff)
       clReleaseKernel(kernel)
       clReleaseCommandQueue(queue)
       clReleaseProgram(program)
       clReleaseContext(context)
    
       PrintL "Press any key to continue..."   
       WaitKey
       APP_SetReturnCode(0)                            
    
    End Function
    
    Function pError( sError As String )
    
      Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_INTENSITY)
      PrintL sError
      Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_GREEN | %CONSOLE_FOREGROUND_BLUE)
      
    End Function
    
    You will need the latest ThinBASIC and OpenCL headers to run it + of course modern GPU or CPU.


    Enjoy!,
    Petr
    Attached Files Attached Files
    Last edited by Petr Schreiber; 18-09-2012 at 15:23.
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

  2. #2
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,146
    Rep Power
    735
    Updated code on 18-9-2012, 15:22 CET - there was a typo in error handling of failed build, fixed
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

  3. #3
    thinBasic MVPs kryton9's Avatar
    Join Date
    Nov 2006
    Location
    Naples, Florida & Duluth, Georgia
    Age
    68
    Posts
    3,865
    Rep Power
    404
    Thanks for the first of hopefully many examples Petr. I looked over the code and it is daunting to use OpenCL. Goodluck as you make your way through the book and code, it will be a valuable resource for the rest of us, so thanks!

  4. #4
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,146
    Rep Power
    735
    The example in first chapter is to give overview of how the finished minimal example works. Lot of odd things to see, might even look confusing.
    Examples since chapter 2 go right from the basics, I will start posting them soon. You will see it is nothing to be scared of


    Petr
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

  5. #5
    Finally had the chance to try this. I haven't got around to trying the other examples yet but I'm sure they'll work. :-D

    Operating System: Windows 10 Home 64-bit
    CPU: Intel Celeron N4000 CPU @ 1.10GHz
    Memory: 4.00GB RAM
    Graphics: Intel UHD Graphics 600

  6. #6
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,146
    Rep Power
    735
    Hi Matthew,

    thanks for testing. Did you have to modify the headers in any way for your Radeon?


    Petr
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

  7. #7
    No not yet I haven't but this is the only example that I've tried so-far. Maybe when I try the other examples later I'll need to make some changes?
    Operating System: Windows 10 Home 64-bit
    CPU: Intel Celeron N4000 CPU @ 1.10GHz
    Memory: 4.00GB RAM
    Graphics: Intel UHD Graphics 600

  8. #8
    Super Moderator Petr Schreiber's Avatar
    Join Date
    Aug 2005
    Location
    Brno - Czech Republic
    Posts
    7,146
    Rep Power
    735
    Hopefully not,

    OpenCL C is not device specific. My only worry was about name of DLL on ATi/AMD cards - in first releases it was atiocl.dll, while NVidia and Intel had OpenCL.DLL.
    It seems Radeon guys have changed the naming convention, good for us


    Petr
    Learn 3D graphics with ThinBASIC, learn TBGL!
    Windows 10 64bit - Intel Core i5-3350P @ 3.1GHz - 16 GB RAM - NVIDIA GeForce GTX 1050 Ti 4GB

Similar Threads

  1. Faster Standard Multiplication
    By danbaron in forum Other languages
    Replies: 1
    Last Post: 01-01-2012, 13:12
  2. Toom-Cook Multiplication
    By danbaron in forum Math: all about
    Replies: 9
    Last Post: 09-12-2011, 00:25
  3. OpenCL Kernel Code Decorator [Updated Sep 04 2011]
    By Petr Schreiber in forum OpenCL
    Replies: 2
    Last Post: 02-05-2010, 06:31
  4. Collaborative development of the Linux Kernel
    By Charles Pegge in forum General
    Replies: 3
    Last Post: 27-11-2009, 21:58
  5. Distributing ThinBasic with IndeED, what is needed?
    By Michael Hartlef in forum thinBasic General
    Replies: 4
    Last Post: 18-12-2006, 14:56

Members who have read this thread: 0

There are no members to list at the moment.

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •