Petr Schreiber
17-09-2012, 22:24
This is the adapted code from listing 1.1 in chapter 1 of the book OpenCL in Action.
The example basically performs this matrix multiplication:
| 0.0 2.0 4.0 6.0 | | 0.0 | | 84.0 |
| | | | | |
| 8.0 10.0 12.0 14.0 | | 3.0 | | 228.0 |
| | x | | = | |
| 16.0 18.0 20.0 22.0 | | 6.0 | | 372.0 |
| | | | | |
| 24.0 26.0 28.0 30.0 | | 9.0 | | 516.0 |
Comparing to the original example I added more informative text information to console output.
' -- NOTE: Enhanced to list the result/expected result
Uses "Console", "File"
#INCLUDE "%APP_INCLUDEPATH%/cl/cl.tBasicU"
Begin Const
$PROGRAM_FILE = APP_SourcePath+"matvec.cl"
$KERNEL_FUNC = "matvec_mult"
End Const
Function TBMain()
/* Host/device data structures */
tcl_platform_id platform
tcl_device_id device
tcl_context context
tcl_command_queue queue
tcl_int i, ErrCl
/* Program/kernel data structures */
tcl_program program
String program_buffer, program_log
tSize program_size, log_size
tcl_kernel kernel
/* Data and buffers */
Single mtx(16), vec(4), result(4)
Single correct(4) = 0.0, 0.0, 0.0, 0.0
tcl_mem mat_buff, vec_buff, res_buff
tSize work_units_per_kernel
/* Initialize data to be processed by the kernel */
For i = 1 To 16
mtx(i) = (i-1) * 2.0
Next
For i = 1 To 4
vec(i) = (i-1) * 3.0
correct(1) += mtx(i) * vec(i)
correct(2) += mtx(i+4) * vec(i)
correct(3) += mtx(i+8) * vec(i)
correct(4) += mtx(i+12) * vec(i)
Next
/* Identify a platform */
ErrCl = clGetPlatformIDs(1, platform, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't find any platforms")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Access a device */
ErrCl = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, device, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't find any devices")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Create the context */
context = clCreateContext(ByVal NULL, 1, device, ByVal NULL, ByVal NULL, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create a context")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Read program file and place content into buffer */
program_buffer = FILE_Load($PROGRAM_FILE)
program_size = Len(program_buffer)
If (program_size = 0) Then
perror("Couldn't find the program file")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Create program from file */
program = clCreateProgramWithSource(context, 1, VarPtr(program_buffer), program_size, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the program")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
program_buffer = ""
/* Build program */
ErrCl = clBuildProgram(program, 0, ByVal 0, "", 0, ByVal 0)
If (ErrCl < 0) Then
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
0, ByVal NULL, log_size)
program_log = Repeat$(log_size+1, $NUL)
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
log_size + 1, ByVal StrPtr(program_log), ByVal NULL)
PrintL program_log
program_log = ""
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Create kernel for the mat_vec_mult function */
kernel = clCreateKernel(program, StrPtr($KERNEL_FUNC), ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the kernel")
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Create CL buffers to hold input and output data */
mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, SizeOf(Single)*16, mtx, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create a buffer object")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, SizeOf(Single)*4, vec, ByVal NULL)
res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
SizeOf(Single)*4, ByVal NULL, ByVal NULL)
/* Create kernel arguments from the CL buffers */
ErrCl = clSetKernelArg(kernel, 0, SizeOf(tcl_mem), mat_buff)
If (ErrCl < 0) Then
perror("Couldn't set the kernel argument")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
clSetKernelArg(kernel, 1, SizeOf(tcl_mem), vec_buff)
clSetKernelArg(kernel, 2, SizeOf(tcl_mem), res_buff)
/* Create a CL command queue for the device*/
queue = clCreateCommandQueue(context, device, 0, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the command queue")
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Enqueue the command queue to the device */
work_units_per_kernel = 4 /* 4 work-units per kernel */
ErrCl = clEnqueueNDRangeKernel(queue, kernel, 1, ByVal NULL, work_units_per_kernel,
ByVal NULL, 0, ByVal NULL, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't enqueue the kernel execution command")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Read the result */
ErrCl = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, SizeOf(Single)*4,
result, 0, ByVal NULL, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't enqueue the read buffer command")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Test the result */
If((result(1) = correct(1)) And (result(2) = correct(2)) And (result(3) = correct(3)) And (result(4) = correct(4))) Then
PrintL("Matrix-vector multiplication successful.")
Else
PrintL("Matrix-vector multiplication unsuccessful.")
End If
PrintL("Calculated result: " + Join$(result, ", "))
PrintL("Expected result : " + Join$(correct, ", "))
/* Deallocate resources */
clReleaseMemObject(mat_buff)
clReleaseMemObject(vec_buff)
clReleaseMemObject(res_buff)
clReleaseKernel(kernel)
clReleaseCommandQueue(queue)
clReleaseProgram(program)
clReleaseContext(context)
PrintL "Press any key to continue..."
WaitKey
APP_SetReturnCode(0)
End Function
Function pError( sError As String )
Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_INTENSITY)
PrintL sError
Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_GREEN | %CONSOLE_FOREGROUND_BLUE)
End Function
You will need the latest ThinBASIC and OpenCL headers (http://www.thinbasic.com/community/showthread.php?10159-OpenCL-Headers-Updated-Sep-15-2011) to run it + of course modern GPU or CPU (http://www.thinbasic.com/community/showthread.php?10161-OpenCL-Supported-hardware).
Enjoy!,
Petr
The example basically performs this matrix multiplication:
| 0.0 2.0 4.0 6.0 | | 0.0 | | 84.0 |
| | | | | |
| 8.0 10.0 12.0 14.0 | | 3.0 | | 228.0 |
| | x | | = | |
| 16.0 18.0 20.0 22.0 | | 6.0 | | 372.0 |
| | | | | |
| 24.0 26.0 28.0 30.0 | | 9.0 | | 516.0 |
Comparing to the original example I added more informative text information to console output.
' -- NOTE: Enhanced to list the result/expected result
Uses "Console", "File"
#INCLUDE "%APP_INCLUDEPATH%/cl/cl.tBasicU"
Begin Const
$PROGRAM_FILE = APP_SourcePath+"matvec.cl"
$KERNEL_FUNC = "matvec_mult"
End Const
Function TBMain()
/* Host/device data structures */
tcl_platform_id platform
tcl_device_id device
tcl_context context
tcl_command_queue queue
tcl_int i, ErrCl
/* Program/kernel data structures */
tcl_program program
String program_buffer, program_log
tSize program_size, log_size
tcl_kernel kernel
/* Data and buffers */
Single mtx(16), vec(4), result(4)
Single correct(4) = 0.0, 0.0, 0.0, 0.0
tcl_mem mat_buff, vec_buff, res_buff
tSize work_units_per_kernel
/* Initialize data to be processed by the kernel */
For i = 1 To 16
mtx(i) = (i-1) * 2.0
Next
For i = 1 To 4
vec(i) = (i-1) * 3.0
correct(1) += mtx(i) * vec(i)
correct(2) += mtx(i+4) * vec(i)
correct(3) += mtx(i+8) * vec(i)
correct(4) += mtx(i+12) * vec(i)
Next
/* Identify a platform */
ErrCl = clGetPlatformIDs(1, platform, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't find any platforms")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Access a device */
ErrCl = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, device, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't find any devices")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Create the context */
context = clCreateContext(ByVal NULL, 1, device, ByVal NULL, ByVal NULL, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create a context")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Read program file and place content into buffer */
program_buffer = FILE_Load($PROGRAM_FILE)
program_size = Len(program_buffer)
If (program_size = 0) Then
perror("Couldn't find the program file")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Create program from file */
program = clCreateProgramWithSource(context, 1, VarPtr(program_buffer), program_size, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the program")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
program_buffer = ""
/* Build program */
ErrCl = clBuildProgram(program, 0, ByVal 0, "", 0, ByVal 0)
If (ErrCl < 0) Then
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
0, ByVal NULL, log_size)
program_log = Repeat$(log_size+1, $NUL)
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
log_size + 1, ByVal StrPtr(program_log), ByVal NULL)
PrintL program_log
program_log = ""
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Create kernel for the mat_vec_mult function */
kernel = clCreateKernel(program, StrPtr($KERNEL_FUNC), ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the kernel")
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Create CL buffers to hold input and output data */
mat_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, SizeOf(Single)*16, mtx, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create a buffer object")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
vec_buff = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, SizeOf(Single)*4, vec, ByVal NULL)
res_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
SizeOf(Single)*4, ByVal NULL, ByVal NULL)
/* Create kernel arguments from the CL buffers */
ErrCl = clSetKernelArg(kernel, 0, SizeOf(tcl_mem), mat_buff)
If (ErrCl < 0) Then
perror("Couldn't set the kernel argument")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
clSetKernelArg(kernel, 1, SizeOf(tcl_mem), vec_buff)
clSetKernelArg(kernel, 2, SizeOf(tcl_mem), res_buff)
/* Create a CL command queue for the device*/
queue = clCreateCommandQueue(context, device, 0, ErrCl)
If (ErrCl < 0) Then
perror("Couldn't create the command queue")
APP_SetReturnCode(1) : waitkey : Exit Function
End If
/* Enqueue the command queue to the device */
work_units_per_kernel = 4 /* 4 work-units per kernel */
ErrCl = clEnqueueNDRangeKernel(queue, kernel, 1, ByVal NULL, work_units_per_kernel,
ByVal NULL, 0, ByVal NULL, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't enqueue the kernel execution command")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Read the result */
ErrCl = clEnqueueReadBuffer(queue, res_buff, CL_TRUE, 0, SizeOf(Single)*4,
result, 0, ByVal NULL, ByVal NULL)
If (ErrCl < 0) Then
perror("Couldn't enqueue the read buffer command")
APP_SetReturnCode(1) : WaitKey : Exit Function
End If
/* Test the result */
If((result(1) = correct(1)) And (result(2) = correct(2)) And (result(3) = correct(3)) And (result(4) = correct(4))) Then
PrintL("Matrix-vector multiplication successful.")
Else
PrintL("Matrix-vector multiplication unsuccessful.")
End If
PrintL("Calculated result: " + Join$(result, ", "))
PrintL("Expected result : " + Join$(correct, ", "))
/* Deallocate resources */
clReleaseMemObject(mat_buff)
clReleaseMemObject(vec_buff)
clReleaseMemObject(res_buff)
clReleaseKernel(kernel)
clReleaseCommandQueue(queue)
clReleaseProgram(program)
clReleaseContext(context)
PrintL "Press any key to continue..."
WaitKey
APP_SetReturnCode(0)
End Function
Function pError( sError As String )
Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_INTENSITY)
PrintL sError
Console_SetTextAttribute(%CONSOLE_FOREGROUND_RED | %CONSOLE_FOREGROUND_GREEN | %CONSOLE_FOREGROUND_BLUE)
End Function
You will need the latest ThinBASIC and OpenCL headers (http://www.thinbasic.com/community/showthread.php?10159-OpenCL-Headers-Updated-Sep-15-2011) to run it + of course modern GPU or CPU (http://www.thinbasic.com/community/showthread.php?10161-OpenCL-Supported-hardware).
Enjoy!,
Petr