36 '''This module contains the GOcean-specific OpenCL transformation.
41 from fparser.two
import Fortran2003
46 from psyclone.psyGen import Transformation, args_filter, InvokeSchedule, \
51 Assignment, IfBlock, ArrayReference, Schedule, BinaryOperation, \
52 StructureReference, FileContainer, CodeBlock, IntrinsicCall
54 ArrayType, DataSymbol, RoutineSymbol, ContainerSymbol,
55 UnsupportedFortranType, ArgumentInterface, ImportInterface,
56 INTEGER_TYPE, CHARACTER_TYPE, BOOLEAN_TYPE, ScalarType)
62 Switches on/off the generation of an OpenCL PSy layer for a given
63 InvokeSchedule. Additionally, it will generate OpenCL kernels for
64 each of the kernels referenced by the Invoke. For example:
66 >>> from psyclone.parse.algorithm import parse
67 >>> from psyclone.psyGen import PSyFactory
69 >>> FILENAME = "shallow_alg.f90" # examples/gocean/eg1
70 >>> ast, invoke_info = parse(FILENAME, api=API)
71 >>> psy = PSyFactory(API, distributed_memory=False).create(invoke_info)
72 >>> schedule = psy.invokes.get('invoke_0').schedule
73 >>> ocl_trans = GOOpenCLTrans()
74 >>> ocl_trans.apply(schedule)
75 >>> print(schedule.view())
80 _OCL_MANAGEMENT_QUEUE = 1
91 _enable_profiling =
False
95 _transformed_invokes = 0
102 :returns: the name of this transformation.
105 return "GOOpenCLTrans"
109 Checks that the supplied InvokeSchedule is valid and that an OpenCL
110 version of it can be generated.
112 :param node: the Schedule to check.
113 :type node: :py:class:`psyclone.psyGen.InvokeSchedule`
114 :param options: a dictionary with options for transformations.
115 :type options: dict of str:value or None
116 :param bool options["enable_profiling"]: whether or not to set up the \
117 OpenCL environment with the profiling option enabled.
118 :param bool options["out_of_order"]: whether or not to set up the \
119 OpenCL environment with the out_of_order option enabled.
120 :param bool options["end_barrier"]: whether or not to add an OpenCL \
121 barrier at the end of the transformed invoke.
123 :raises TransformationError: if the InvokeSchedule is not for the \
125 :raises TransformationError: if any of the kernels have arguments \
126 which are passed as a literal.
127 :raises TransformationError: if any of the provided options is invalid.
128 :raises TransformationError: if any of the provided options is not \
129 compatible with a previous OpenCL
131 :raises TransformationError: if any kernel in this invoke has a \
132 global variable used by an import.
133 :raises TransformationError: if any kernel does not iterate over \
137 if isinstance(node, InvokeSchedule):
138 if not isinstance(node, GOInvokeSchedule):
139 raise TransformationError(
140 f
"OpenCL generation is currently only supported for the "
141 f
"GOcean API but got an InvokeSchedule of type: "
142 f
"'{type(node).__name__}'")
144 raise TransformationError(
145 f
"Error in GOOpenCLTrans: the supplied node must be a (sub-"
146 f
"class of) InvokeSchedule but got {type(node)}")
149 valid_options = [
'end_barrier',
'enable_profiling',
'out_of_order']
150 for key, value
in options.items():
151 if key
in valid_options:
153 if not isinstance(value, bool):
154 raise TransformationError(
155 f
"InvokeSchedule OpenCL option '{key}' should be a "
158 raise TransformationError(
159 f
"InvokeSchedule does not support the OpenCL option "
160 f
"'{key}'. The supported options are: {valid_options}.")
164 if (
'enable_profiling' in options
and
166 raise TransformationError(
167 f
"Can't generate an OpenCL Invoke with enable_profiling='"
168 f
"{options['enable_profiling']}' since a previous "
169 f
"transformation used a different value, and their OpenCL"
170 f
" environments must match.")
172 if (
'out_of_order' in options
and
174 raise TransformationError(
175 f
"Can't generate an OpenCL Invoke with out_of_order='"
176 f
"{options['out_of_order']}' since a previous "
177 f
"transformation used a different value, and their OpenCL "
178 f
"environments must match.")
181 args = args_filter(node.args, arg_types=[
"scalar"])
184 raise TransformationError(
185 f
"Cannot generate OpenCL for Invokes that contain kernel "
186 f
"arguments which are a literal, but found the literal "
187 f
"'{arg.name}' used as an argument in invoke "
193 for kern
in node.kernels():
195 ksched = kern.get_kernel_schedule()
196 global_variables = ksched.symbol_table.imported_symbols
198 raise TransformationError(
199 f
"The Symbol Table for kernel '{kern.name}' contains the "
200 f
"following symbols with 'global' scope: "
201 f
"{[sym.name for sym in global_variables]}. An OpenCL "
202 f
"kernel cannot call other kernels and all of the data it "
203 f
"accesses must be passed by argument. Use the "
204 f
"KernelImportsToArguments transformation to convert such "
205 f
"symbols to kernel arguments first.")
208 for kernel
in node.kernels():
209 inner_loop = kernel.ancestor(GOLoop)
210 outer_loop = inner_loop.ancestor(GOLoop)
211 if not (inner_loop.field_space ==
"go_every" and
212 outer_loop.field_space ==
"go_every" and
213 inner_loop.iteration_space ==
"go_all_pts" and
214 outer_loop.iteration_space ==
"go_all_pts"):
215 raise TransformationError(
216 f
"The kernel '{kernel.name}' does not iterate over all "
217 f
"grid points. This is a necessary requirement for "
218 f
"generating the OpenCL code and can be done by applying "
219 f
"the GOMoveIterationBoundariesInsideKernelTrans to each "
220 f
"kernel before the GOOpenCLTrans.")
222 def apply(self, node, options=None):
224 Apply the OpenCL transformation to the supplied GOInvokeSchedule. This
225 causes PSyclone to generate an OpenCL version of the corresponding
226 PSy-layer routine. The generated code makes use of the FortCL
227 library (https://github.com/stfc/FortCL) in order to manage the
228 OpenCL device directly from Fortran.
230 :param node: the InvokeSchedule to transform.
231 :type node: :py:class:`psyclone.psyGen.GOInvokeSchedule`
232 :param options: set of option to tune the OpenCL generation.
233 :type options: dict of str:value or None
234 :param bool options["enable_profiling"]: whether or not to set up the \
235 OpenCL environment with the profiling option enabled.
236 :param bool options["out_of_order"]: whether or not to set up the \
237 OpenCL environment with the out_of_order option enabled.
238 :param bool options["end_barrier"]: whether or not to add an OpenCL \
239 barrier at the end of the transformed invoke.
246 api_config = Config.get().api_conf(
"gocean1.0")
249 if 'enable_profiling' in options:
252 if 'out_of_order' in options:
258 end_barrier = options.get(
'end_barrier',
True)
261 for kernel
in node.coded_kernels():
263 kernel.opencl_options[
"queue_number"])
274 for kern
in node.coded_kernels():
278 fortcl = ContainerSymbol(
"fortcl")
279 node.symbol_table.add(fortcl)
280 get_num_cmd_queues = RoutineSymbol(
281 "get_num_cmd_queues", interface=ImportInterface(fortcl))
282 get_cmd_queues = RoutineSymbol(
283 "get_cmd_queues", interface=ImportInterface(fortcl))
284 get_kernel_by_name = RoutineSymbol(
285 "get_kernel_by_name", interface=ImportInterface(fortcl))
286 node.symbol_table.add(get_num_cmd_queues)
287 node.symbol_table.add(get_cmd_queues)
288 node.symbol_table.add(get_kernel_by_name)
289 clfortran = ContainerSymbol(
"clfortran")
290 node.symbol_table.add(clfortran)
291 cl_finish = RoutineSymbol(
292 "clFinish", interface=ImportInterface(clfortran))
293 cl_launch = RoutineSymbol(
294 "clEnqueueNDRangeKernel",
295 interface=ImportInterface(clfortran))
296 node.symbol_table.add(cl_finish)
297 node.symbol_table.add(cl_launch)
298 iso_c_binding = ContainerSymbol(
"iso_c_binding")
299 node.symbol_table.add(iso_c_binding)
300 c_loc = RoutineSymbol(
301 "C_LOC", interface=ImportInterface(iso_c_binding))
303 "C_NULL_PTR", datatype=INTEGER_TYPE,
304 interface=ImportInterface(iso_c_binding))
305 node.symbol_table.add(c_loc)
306 node.symbol_table.add(c_null)
309 if api_config.debug_mode:
310 ocl_utils = ContainerSymbol(
"ocl_utils_mod")
311 check_status = RoutineSymbol(
312 "check_status", interface=ImportInterface(ocl_utils))
313 node.symbol_table.add(ocl_utils)
314 node.symbol_table.add(check_status)
317 qlist = node.symbol_table.new_symbol(
318 "cmd_queues", symbol_type=DataSymbol,
319 datatype=UnsupportedFortranType(
320 "integer(kind=c_intptr_t), pointer, save :: cmd_queues(:)"),
321 tag=
"opencl_cmd_queues")
324 first = DataSymbol(
"first_time",
325 datatype=UnsupportedFortranType(
326 "logical, save :: first_time = .true."))
327 node.symbol_table.add(first, tag=
"first_time")
328 flag = node.symbol_table.new_symbol(
329 "ierr", symbol_type=DataSymbol, datatype=INTEGER_TYPE,
331 global_size = node.symbol_table.new_symbol(
332 "globalsize", symbol_type=DataSymbol,
333 datatype=UnsupportedFortranType(
334 "integer(kind=c_size_t), target :: globalsize(2)"))
335 local_size = node.symbol_table.new_symbol(
336 "localsize", symbol_type=DataSymbol,
337 datatype=UnsupportedFortranType(
338 "integer(kind=c_size_t), target :: localsize(2)"))
343 for tag, symbol
in node.symbol_table.tags_dict.items():
344 if tag.startswith((
"xstart_",
"xstop_",
"ystart_",
"ystop_")):
345 boundary_vars.append(symbol)
347 for assignment
in node.walk(Assignment):
348 if assignment.lhs.symbol
in boundary_vars:
349 node.children.insert(cursor, assignment.detach())
353 setup_block = IfBlock.create(Reference(first), [])
354 setup_block.preceding_comment = \
355 "Initialise OpenCL runtime, kernels and buffers"
356 node.children.insert(cursor, setup_block)
357 setup_block.if_body.addchild(Call.create(psy_init, []))
360 ptree = Fortran2003.Pointer_Assignment_Stmt(
361 f
"{qlist.name} => {get_cmd_queues.name}()")
362 cblock = CodeBlock([ptree], CodeBlock.Structure.STATEMENT)
363 setup_block.if_body.addchild(cblock)
366 for kern
in node.coded_kernels():
367 name =
"kernel_" + kern.name
369 kpointer = node.symbol_table.lookup_with_tag(name)
371 pointer_type = UnsupportedFortranType(
372 "INTEGER(KIND=c_intptr_t), TARGET, SAVE :: " + name)
373 kpointer = DataSymbol(name, datatype=pointer_type)
374 node.symbol_table.add(kpointer, tag=name)
375 setup_block.if_body.addchild(
378 Call.create(get_kernel_by_name,
379 [Literal(kern.name, CHARACTER_TYPE)])))
382 initialised_fields = set()
383 there_is_a_grid_buffer =
False
384 for kern
in node.coded_kernels():
385 for arg
in kern.arguments.args:
386 if arg.argument_type ==
"field":
387 field = node.symbol_table.lookup(arg.name)
388 if field
not in initialised_fields:
390 call = Call.create(init_buf, [Reference(field)])
391 setup_block.if_body.addchild(call)
392 initialised_fields.add(field)
393 elif (arg.argument_type ==
"grid_property" and
395 if not there_is_a_grid_buffer:
397 field = node.symbol_table.lookup(
398 kern.arguments.find_grid_access().name)
399 call = Call.create(init_grid, [Reference(field)])
400 setup_block.if_body.addchild(call)
401 there_is_a_grid_buffer =
True
402 if not arg.is_scalar:
406 name = arg.name +
"_cl_mem"
408 node.symbol_table.lookup_with_tag(name)
410 node.symbol_table.new_symbol(
411 name, tag=name, symbol_type=DataSymbol,
412 datatype=UnsupportedFortranType(
413 "INTEGER(KIND=c_intptr_t) :: " + name))
419 first_statement_comment =
False
421 for kern
in node.coded_kernels():
422 if kern.name
not in kernel_names:
423 kernel_names.add(kern.name)
425 for child
in callblock.pop_all_children():
426 setup_block.if_body.addchild(child)
427 if not first_statement_comment:
428 child.preceding_comment = (
429 "Do a set_args now so subsequent writes place the "
430 "data appropriately")
431 first_statement_comment =
True
435 first_statement_comment =
False
436 for field
in initialised_fields:
438 RoutineSymbol(field.name+
"%write_to_device"), [])
439 setup_block.if_body.addchild(call)
440 if not first_statement_comment:
441 call.preceding_comment =
"Write data to the device"
442 first_statement_comment =
True
444 if there_is_a_grid_buffer:
445 fieldarg = node.coded_kernels()[0].arguments.find_grid_access()
446 field = node.symbol_table.lookup(fieldarg.name)
447 call = Call.create(write_grid_buf, [Reference(field)])
448 setup_block.if_body.addchild(call)
457 for kern
in node.coded_kernels():
458 outerloop = kern.ancestor(GOLoop).ancestor(GOLoop)
461 garg = node.coded_kernels()[0].arguments.find_grid_access()
462 num_x = api_config.grid_properties[
"go_grid_nx"].fortran\
464 num_y = api_config.grid_properties[
"go_grid_ny"].fortran\
466 assig = Assignment.create(
467 Reference(global_size),
468 Literal(f
"(/{num_x}, {num_y}/)",
469 ArrayType(INTEGER_TYPE, [2])))
470 node.children.insert(outerloop.position, assig)
471 local_size_value = kern.opencl_options[
'local_size']
472 assig = Assignment.create(
473 Reference(local_size),
474 Literal(f
"(/{local_size_value}, 1/)",
475 ArrayType(INTEGER_TYPE, [2])))
476 node.children.insert(outerloop.position, assig)
479 if api_config.debug_mode:
481 global_size_expr = fortran_reader.psyir_from_expression(
482 num_x, node.symbol_table)
484 check_status, global_size_expr,
488 kernelsym = node.symbol_table.lookup_with_tag(
489 "kernel_" + kern.name)
495 queue_number = kern.opencl_options[
'queue_number']
496 cmd_queue = ArrayReference.create(
497 qlist, [Literal(str(queue_number), INTEGER_TYPE)])
498 dependency = outerloop.backward_dependence()
503 for kernel_dep
in dependency.coded_kernels():
504 previous_queue = kernel_dep.opencl_options[
'queue_number']
505 if previous_queue != queue_number:
510 barrier = Assignment.create(
512 Call.create(cl_finish, [
513 ArrayReference.create(qlist, [
514 Literal(str(previous_queue),
516 node.children.insert(outerloop.position, barrier)
521 if dependency
and not dependency.coded_kernels()
and \
523 barrier = Assignment.create(
525 Call.create(cl_finish, [
526 ArrayReference.create(qlist, [
529 node.children.insert(outerloop.position, barrier)
532 if api_config.debug_mode:
533 self.
_add_ready_check_add_ready_check(node, outerloop.position, check_status,
534 kern.name, flag, cl_finish,
537 for child
in callblock.pop_all_children():
538 node.children.insert(outerloop.position, child)
541 assig = Assignment.create(
543 Call.create(cl_launch, [
547 Reference(kernelsym),
549 Literal(
"2", INTEGER_TYPE),
554 Call.create(c_loc, [Reference(global_size)]),
556 Call.create(c_loc, [Reference(local_size)]),
558 Literal(
"0", INTEGER_TYPE),
564 assig.preceding_comment =
"Launch the kernel"
565 node.children.insert(outerloop.position, assig)
569 if api_config.debug_mode:
571 kern.name, flag, cl_finish,
574 nodes_to_detach.append(outerloop)
579 for possible_dependent_node
in node.walk(HaloExchange):
582 dependency = possible_dependent_node.backward_dependence()
584 for kernel_dep
in dependency.coded_kernels():
585 previous_queue = kernel_dep.opencl_options[
'queue_number']
590 barrier = Assignment.create(
592 Call.create(cl_finish, [
593 ArrayReference.create(qlist, [
594 Literal(str(previous_queue),
596 pos = possible_dependent_node.position
597 node.children.insert(pos, barrier)
599 for node_to_detach
in nodes_to_detach:
600 node_to_detach.detach()
606 assign = Assignment.create(Reference(first),
607 Literal(
"false", BOOLEAN_TYPE))
608 assign.preceding_comment =
"Unset the first time flag"
609 node.addchild(assign)
613 def _add_end_barrier(self, node, flag, cl_finish, qlist):
614 ''' Append into the given node a OpenCL Wait operation for each of
615 the OpenCL queues in use.
617 :param node: PSyIR node where to append the barrier.
618 :type node: :py:class:`psyclone.psyir.nodes.Schedule`
619 :param flag: PSyIR symbol to use as flag.
620 :type flag: :py:class:`psyclone.psyir.symbols.DataSymbol`
621 :param cl_finish: PSyIR symbol of the barrier routine.
622 :type cl_finish: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
623 :param qlist: PSyIR symbol of the OpenCL queues array.
624 :type qlist: :py:class:`psyclone.psyir.symbols.DataSymbol`
628 added_comment =
False
630 queue = ArrayReference.create(qlist, [Literal(str(num),
634 Reference(flag), Call.create(cl_finish, [queue])))
635 if not added_comment:
636 node.children[-1].preceding_comment = \
637 "Wait until all kernels have finished"
641 def _add_divisibility_check(node, position, check_status, global_size_expr,
643 ''' Insert into node a check that the global_size is exactly
644 divisible by the local size.
646 :param node: where to insert the divisibility check.
647 :type node: :py:class:`psyclone.psyir.nodes.Schedule`
648 :param int position: location where to insert the divisibilitay check.
649 :param check_status: PSyIR symbol of the check routine.
650 :type check_status: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
651 :param global_size_expr: PSyIR representing the global_size.
652 :type global_size_expr: :py:class:`psyclone.psyir.nodes.DataNode`
653 :param int local_size: size of the OpenCL local work_group.
656 check = BinaryOperation.create(
657 BinaryOperation.Operator.NE,
658 IntrinsicCall.create(
659 IntrinsicCall.Intrinsic.MOD,
661 Literal(str(local_size), INTEGER_TYPE)]
663 Literal(
"0", INTEGER_TYPE))
664 message = (
"Global size is not a multiple of local size ("
665 "mandatory in OpenCL < 2.0).")
666 error = Call.create(check_status,
667 [Literal(message, CHARACTER_TYPE),
668 Literal(
"-1", INTEGER_TYPE)])
669 ifblock = IfBlock.create(check, [error])
670 node.children.insert(position, ifblock)
673 def _add_kernel_check(node, position, check_status, kernel_name,
674 flag, cl_finish, cmd_queue):
675 ''' Insert into node a check that the kernel has been launched and
676 has been executed successfully.
678 :param node: where to insert the kernel check.
679 :type node: :py:class:`psyclone.psyir.nodes.Schedule`
680 :param int position: location where to insert the kernel check.
681 :param check_status: PSyIR symbol of the check routine.
682 :type check_status: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
683 :param str kernel_name: name of the kernel being checked.
684 :param flag: PSyIR symbol to use as flag.
685 :type flag: :py:class:`psyclone.psyir.symbols.DataSymbol`
686 :param cl_finish: PSyIR symbol of the barrier routine.
687 :type cl_finish: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
688 :param cmd_queue: PSyIR symbol of the OpenCL command queues array.
689 :type cmd_queue: :py:class:`psyclone.psyir.symbols.DataSymbol`
693 message = Literal(f
"{kernel_name} clEnqueueNDRangeKernel",
695 check = Call.create(check_status, [message, Reference(flag)])
696 node.children.insert(position, check)
699 barrier = Assignment.create(
701 Call.create(cl_finish, [cmd_queue]))
702 node.children.insert(position + 1, barrier)
705 message = Literal(f
"Errors during {kernel_name}", CHARACTER_TYPE)
706 check = Call.create(check_status, [message, Reference(flag)])
707 node.children.insert(position + 2, check)
710 def _add_ready_check(node, position, check_status, kernel_name,
711 flag, cl_finish, cmd_queue):
712 ''' Insert into node a check that verifies if everything in the
713 command queues previous to a kernel launch has completed successfully.
715 :param node: where to insert the kernel check.
716 :type node: :py:class:`psyclone.psyir.nodes.Schedule`
717 :param int position: location where to insert the kernel check.
718 :param check_status: PSyIR symbol of the check routine.
719 :type check_status: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
720 :param str kernel_name: name of the kernel being checked.
721 :param flag: PSyIR symbol to use as flag.
722 :type flag: :py:class:`psyclone.psyir.symbols.DataSymbol`
723 :param cl_finish: PSyIR symbol of the barrier routine.
724 :type cl_finish: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
725 :param cmd_queue: PSyIR symbol of the OpenCL command queues array.
726 :type cmd_queue: :py:class:`psyclone.psyir.symbols.DataSymbol`
729 barrier = Assignment.create(
731 Call.create(cl_finish, [cmd_queue]))
732 node.children.insert(position, barrier)
733 message = Literal(f
"Errors before {kernel_name} launch",
735 check = Call.create(check_status, [message, Reference(flag)])
736 node.children.insert(position + 1, check)
738 def _insert_kernel_code_in_opencl_file(self, kernel):
739 ''' Insert the given kernel into a OpenCL file. For this we need
740 to remove the 'go_wp' precision symbol which can't be generated
741 by OpenCL. We assume 'go_wp' is a OpenCL double.
743 :param kernel: the kernel to insert.
744 :type kernel: :py:class:`psyclone.psyir.nodes.KernelSchedule`
748 self.
_kernels_file_kernels_file = FileContainer(
"opencl_kernels")
752 kernel_copy = kernel.get_kernel_schedule().copy()
753 symtab = kernel_copy.symbol_table
761 for sym
in symtab.datasymbols:
764 if (hasattr(sym.datatype,
"precision")
and
765 isinstance(sym.datatype.precision, DataSymbol)):
766 sym.datatype._precision = ScalarType.Precision.DOUBLE
768 if 'go_wp' in symtab:
769 del symtab._symbols[
'go_wp']
772 for routine
in self.
_kernels_file_kernels_file.walk(Routine):
773 if routine.name == kernel.name:
781 def _output_opencl_kernels_file(self):
782 ''' Write the OpenCL kernels to a file using the OpenCL backend.
796 new_name = f
"opencl_kernels_{name_idx}.cl"
802 os.path.join(Config.get().kernel_output_dir, new_name),
803 os.O_CREAT | os.O_WRONLY | os.O_EXCL)
804 except (OSError, IOError):
810 os.write(fdesc, new_kern_code.encode())
815 def _generate_set_args_call(kernel, scope):
817 Generate the Call statement to the set_args subroutine for the
820 :param kernel: the kernel for which to generate a call to its \
821 arg_setter subroutine.
822 :type kernel: :py:class:`psyclone.psyGen.CodedKern`
823 :param scope: The node representing the scope where the call \
824 statements will be inserted.
825 :type scope: :py:class:`psyclone.psyir.nodes.ScopingNode`
827 :returns: a block of statements that represent the set_args call
828 :rtype: :py:class:`psyclone.psyir.nodes.Schedule`
831 call_block = Schedule()
834 symtab = scope.symbol_table
835 kernelsym = symtab.lookup_with_tag(
"kernel_" + kernel.name)
844 for boundary
in [
"xstart",
"xstop",
"ystart",
"ystop"]:
845 tag = boundary +
"_" + kernel.name
846 symbol = symtab.lookup_with_tag(tag)
847 boundaries.append(symbol.name)
848 except KeyError
as err:
850 f
"Boundary symbol tag '{tag}' not found while generating the "
851 f
"OpenCL code for kernel '{kernel.name}'. Make sure to apply "
852 f
"the GOMoveIterationBoundariesInsideKernelTrans before "
853 f
"attempting the OpenCL code generation.")
from err
855 api_config = Config.get().api_conf(
"gocean1.0")
857 arguments = [Reference(kernelsym)]
858 for arg
in kernel.arguments.args:
859 if arg.argument_type ==
"scalar":
860 if arg.name
in boundaries:
863 bop = BinaryOperation.create(BinaryOperation.Operator.SUB,
864 arg.psyir_expression(),
865 Literal(
"1", INTEGER_TYPE))
866 arguments.append(bop)
868 arguments.append(arg.psyir_expression())
869 elif arg.argument_type ==
"field":
871 field = symtab.lookup(arg.name)
872 symbol = symtab.lookup_with_tag(arg.name +
"_cl_mem")
873 source = StructureReference.create(field, [
'device_ptr'])
874 dest = Reference(symbol)
875 icall = IntrinsicCall.create(IntrinsicCall.Intrinsic.TRANSFER,
877 assig = Assignment.create(dest.copy(), icall)
878 call_block.addchild(assig)
879 arguments.append(Reference(symbol))
880 elif arg.argument_type ==
"grid_property":
881 garg = kernel.arguments.find_grid_access()
885 StructureReference.create(
886 symtab.lookup(garg.name),
887 api_config.grid_properties[arg._property_name]
888 .fortran.split(
'%')[1:]
892 device_grid_property = arg.name +
"_device"
893 field = symtab.lookup(garg.name)
894 source = StructureReference.create(
895 field, [
'grid', device_grid_property])
896 symbol = symtab.lookup_with_tag(arg.name +
"_cl_mem")
897 dest = Reference(symbol)
898 icall = IntrinsicCall.create(
899 IntrinsicCall.Intrinsic.TRANSFER,
901 assig = Assignment.create(dest.copy(), icall)
902 call_block.addchild(assig)
903 arguments.append(Reference(symbol))
905 call_symbol = symtab.lookup_with_tag(kernel.name +
"_set_args")
906 call_block.addchild(Call.create(call_symbol, arguments))
910 def _insert_ocl_arg_setter_routine(node, kernel):
912 Returns the symbol of the subroutine that sets the OpenCL kernel
913 arguments for the provided PSy-layer kernel using FortCL. If the
914 subroutine doesn't exist it also generates it.
916 :param node: the container where the new subroutine will be inserted.
917 :type node: :py:class:`psyclone.psyir.nodes.Container`
918 :param kernel: the kernel call for which to provide the arg_setter \
920 :type kernel: :py:class:`psyclone.psyGen.CodedKern`
922 :returns: the symbol representing the arg_setter subroutine.
923 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
927 sub_name = kernel.name +
"_set_args"
929 return node.symbol_table.lookup_with_tag(sub_name)
936 node.symbol_table.add(RoutineSymbol(sub_name), tag=sub_name)
937 argsetter = Routine(sub_name)
941 clfortran = ContainerSymbol(
"clfortran")
942 clsetkernelarg = RoutineSymbol(
"clSetKernelArg",
943 interface=ImportInterface(clfortran))
944 iso_c = ContainerSymbol(
"iso_c_binding")
945 c_sizeof = RoutineSymbol(
"C_SIZEOF", interface=ImportInterface(iso_c))
946 c_loc = RoutineSymbol(
"C_LOC", interface=ImportInterface(iso_c))
947 c_intptr_t = RoutineSymbol(
"c_intptr_t",
948 interface=ImportInterface(iso_c))
949 ocl_utils = ContainerSymbol(
"ocl_utils_mod")
950 check_status = RoutineSymbol(
"check_status",
951 interface=ImportInterface(ocl_utils))
952 argsetter.symbol_table.add(clfortran)
953 argsetter.symbol_table.add(clsetkernelarg)
954 argsetter.symbol_table.add(iso_c)
955 argsetter.symbol_table.add(c_sizeof)
956 argsetter.symbol_table.add(c_loc)
957 argsetter.symbol_table.add(c_intptr_t)
958 argsetter.symbol_table.add(ocl_utils)
959 argsetter.symbol_table.add(check_status)
962 kobj = argsetter.symbol_table.new_symbol(
963 "kernel_obj", symbol_type=DataSymbol,
964 interface=ArgumentInterface(ArgumentInterface.Access.READ),
965 datatype=UnsupportedFortranType(
966 "INTEGER(KIND=c_intptr_t), TARGET :: kernel_obj"))
967 arg_list.append(kobj)
970 for arg
in kernel.arguments.args:
972 name = argsetter.symbol_table.next_available_name(arg.name)
977 if arg.is_scalar
and arg.intrinsic_type ==
"real":
978 pointer_type = UnsupportedFortranType(
979 "REAL(KIND=go_wp), INTENT(IN), TARGET :: " + name)
981 pointer_type = UnsupportedFortranType(
982 "INTEGER, INTENT(IN), TARGET :: " + name)
985 pointer_type = UnsupportedFortranType(
986 "INTEGER(KIND=c_intptr_t), INTENT(IN), TARGET :: " + name)
988 new_arg = DataSymbol(
989 name, datatype=pointer_type,
990 interface=ArgumentInterface(ArgumentInterface.Access.READ))
991 argsetter.symbol_table.add(new_arg)
992 arg_list.append(new_arg)
994 argsetter.symbol_table.specify_argument_list(arg_list)
997 ierr = argsetter.symbol_table.new_symbol(
998 "ierr", symbol_type=DataSymbol, datatype=INTEGER_TYPE)
1002 for index, variable
in enumerate(arg_list[1:]):
1003 call = Call.create(clsetkernelarg,
1005 Literal(str(index), INTEGER_TYPE),
1006 Call.create(c_sizeof, [Reference(variable)]),
1007 Call.create(c_loc, [Reference(variable)])])
1008 assignment = Assignment.create(Reference(ierr), call)
1009 argsetter.addchild(assignment)
1010 emsg = f
"clSetKernelArg: arg {index} of {kernel.name}"
1011 call = Call.create(check_status, [Literal(emsg, CHARACTER_TYPE),
1013 argsetter.addchild(call)
1015 argsetter.children[0].preceding_comment = \
1016 f
"Set the arguments for the {kernel.name} OpenCL Kernel"
1019 node.addchild(argsetter)
1021 return node.symbol_table.lookup_with_tag(sub_name)
1023 def _insert_opencl_init_routine(self, node):
1025 Returns the symbol of the subroutine that initialises the OpenCL
1026 environment using FortCL. If the subroutine doesn't exist it also
1029 :param node: the container where the new subroutine will be inserted.
1030 :type node: :py:class:`psyclone.psyir.nodes.Container`
1032 :returns: the symbol representing the OpenCL initialisation subroutine.
1033 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1036 symtab = node.symbol_table
1041 return symtab.lookup_with_tag(
"ocl_init_routine")
1048 subroutine_name = symtab.new_symbol(
"psy_init",
1049 symbol_type=RoutineSymbol,
1050 tag=
"ocl_init_routine").name
1054 distributed_memory = Config.get().distributed_memory
1055 devices_per_node = Config.get().ocl_devices_per_node
1056 additional_uses =
""
1057 additional_stmts =
""
1058 if devices_per_node > 1
and distributed_memory:
1059 additional_uses +=
"USE parallel_mod, ONLY: get_rank"
1060 additional_stmts += \
1061 f
"ocl_device_num = mod(get_rank()-1, {devices_per_node}) + 1"
1065 unique_kernels = {kernel.name
for kernel
in node.coded_kernels()}
1069 subroutine psy_init()
1071 use fortcl, only: ocl_env_init, add_kernels
1072 character(len=30) kernel_names({len(unique_kernels)})
1073 integer :: ocl_device_num=1
1074 logical, save :: initialised=.false.
1075 ! Check to make sure we only execute this routine once
1076 if (.not. initialised) then
1077 initialised = .true.
1078 ! Initialise the opencl environment/device
1080 call ocl_env_init({self._max_queue_number}, ocl_device_num, &
1081 {".true." if self._enable_profiling else ".false."}, &
1082 {".true." if self._out_of_order else ".false."})
1083 ! The kernels this psy layer module requires
1086 for index, kernel_name
in enumerate(unique_kernels):
1087 code += f
"kernel_names({index + 1}) = \"{kernel_name}\"\n"
1090 ! Create the opencl kernel objects. This expects to find all of
1091 ! the compiled kernels in FORTCL_KERNELS_FILE environment variable
1092 call add_kernels({len(unique_kernels)}, kernel_names)
1094 end subroutine psy_init'''
1098 container = fortran_reader.psyir_from_source(code)
1099 subroutine = container.children[0]
1101 subroutine.name = subroutine_name
1104 node.addchild(subroutine.detach())
1106 return symtab.lookup_with_tag(
"ocl_init_routine")
1109 def _insert_initialise_grid_buffers(node):
1111 Returns the symbol of a subroutine that initialises all OpenCL grid
1112 buffers in the OpenCL device using FortCL. If the subroutine doesn't
1113 already exist it also generates it.
1115 :param node: the container where the new subroutine will be inserted.
1116 :type node: :py:class:`psyclone.psyir.nodes.Container`
1118 :returns: the symbol of the grid buffer initialisation subroutine.
1119 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1123 symtab = node.symbol_table
1125 return symtab.lookup_with_tag(
"ocl_init_grid_buffers")
1132 subroutine_name = symtab.new_symbol(
"initialise_grid_device_buffers",
1133 symbol_type=RoutineSymbol,
1134 tag=
"ocl_init_grid_buffers").name
1137 api_config = Config.get().api_conf(
"gocean1.0")
1138 props = api_config.grid_properties
1139 num_x = props[
"go_grid_nx"].fortran.format(
"field")
1140 num_y = props[
"go_grid_ny"].fortran.format(
"field")
1144 for key, prop
in props.items():
1145 if key ==
"go_grid_data":
1149 if prop.type ==
"array" and prop.intrinsic_type ==
"integer":
1150 int_arrays.append(prop.fortran.format(
"field"))
1151 elif prop.type ==
"array" and prop.intrinsic_type ==
"real":
1152 real_arrays.append(prop.fortran.format(
"field"))
1156 subroutine initialise_device_grid(field)
1157 USE fortcl, ONLY: create_ronly_buffer
1158 USE iso_c_binding, only: c_size_t
1160 type(r2d_field), intent(inout), target :: field
1161 integer(kind=c_size_t) size_in_bytes
1162 IF (.not. c_associated({int_arrays[0]}_device)) THEN
1163 ! Create integer grid fields
1164 size_in_bytes = int({num_x}*{num_y}, 8) * &
1165 c_sizeof({int_arrays[0]}(1,1))
1168 for int_array
in int_arrays:
1170 {int_array}_device = transfer( &
1171 create_ronly_buffer(size_in_bytes), {int_array}_device)
1175 ! Create real grid buffers
1176 size_in_bytes = int({num_x} * {num_y}, 8) * &
1177 c_sizeof({real_arrays[0]}(1,1))
1180 for real_array
in real_arrays:
1182 {real_array}_device = transfer( &
1183 create_ronly_buffer(size_in_bytes), {real_array}_device)
1188 end subroutine initialise_device_grid
1193 container = fortran_reader.psyir_from_source(code)
1194 subroutine = container.children[0]
1196 subroutine.name = subroutine_name
1199 node.addchild(subroutine.detach())
1201 return symtab.lookup_with_tag(
"ocl_init_grid_buffers")
1203 def _insert_write_grid_buffers(self, node):
1205 Returns the symbol of a subroutine that writes the values of the grid
1206 properties into the OpenCL device buffers using FortCL. If the
1207 subroutine doesn't already exist it also generates it.
1209 :param node: the container where the new subroutine will be inserted.
1210 :type node: :py:class:`psyclone.psyir.nodes.Container`
1212 :returns: the symbol representing the grid buffers writing subroutine.
1213 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1217 symtab = node.symbol_table
1219 return symtab.lookup_with_tag(
"ocl_write_grid_buffers")
1226 subroutine_name = symtab.new_symbol(
"write_grid_buffers",
1227 symbol_type=RoutineSymbol,
1228 tag=
"ocl_write_grid_buffers").name
1231 api_config = Config.get().api_conf(
"gocean1.0")
1232 props = api_config.grid_properties
1233 num_x = props[
"go_grid_nx"].fortran.format(
"field")
1234 num_y = props[
"go_grid_ny"].fortran.format(
"field")
1238 subroutine write_device_grid(field)
1239 USE fortcl, ONLY: get_cmd_queues
1240 use iso_c_binding, only: c_intptr_t, c_size_t, c_sizeof
1242 USE ocl_utils_mod, ONLY: check_status
1243 type(r2d_field), intent(inout), target :: field
1244 integer(kind=c_size_t) size_in_bytes
1245 INTEGER(c_intptr_t), pointer :: cmd_queues(:)
1246 integer(c_intptr_t) :: cl_mem
1248 cmd_queues => get_cmd_queues()
1249 ! Integer grid buffers
1250 size_in_bytes = int({num_x} * {num_y}, 8) * &
1251 c_sizeof(field%grid%tmask(1,1))
1252 cl_mem = transfer(field%grid%tmask_device, cl_mem)
1253 ierr = clEnqueueWriteBuffer( &
1254 cmd_queues({self._OCL_MANAGEMENT_QUEUE}), &
1255 cl_mem, CL_TRUE, 0_8, size_in_bytes, &
1256 C_LOC(field%grid%tmask), 0, C_NULL_PTR, C_NULL_PTR)
1257 CALL check_status("clEnqueueWriteBuffer tmask", ierr)
1259 size_in_bytes = int({num_x} * {num_y}, 8) * &
1260 c_sizeof(field%grid%area_t(1,1))
1263 cl_mem = transfer(field%grid%{0}_device, cl_mem)
1264 ierr = clEnqueueWriteBuffer(cmd_queues({1}), &
1265 cl_mem, CL_TRUE, 0_8, size_in_bytes, &
1266 C_LOC(field%grid%{0}), 0, C_NULL_PTR, C_NULL_PTR)
1267 CALL check_status("clEnqueueWriteBuffer {0}_device", ierr)
1269 for grid_prop
in [
'area_t',
'area_u',
'area_v',
'dx_u',
'dx_v',
1270 'dx_t',
'dy_u',
'dy_v',
'dy_t',
'gphiu',
'gphiv']:
1272 code +=
"end subroutine write_device_grid"
1276 container = fortran_reader.psyir_from_source(code)
1277 subroutine = container.children[0]
1279 subroutine.name = subroutine_name
1282 node.addchild(subroutine.detach())
1284 return symtab.lookup_with_tag(
"ocl_write_grid_buffers")
1286 def _insert_ocl_read_from_device_function(self, node):
1288 Returns the symbol of a subroutine that retrieves the data back from
1289 an OpenCL device using FortCL. If the subroutine doesn't already exist
1290 it also generates it.
1292 :param node: the container where the new subroutine will be inserted.
1293 :type node: :py:class:`psyclone.psyir.nodes.Container`
1295 :returns: the symbol of the buffer data retrieving subroutine.
1296 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1299 symtab = node.symbol_table
1301 return symtab.lookup_with_tag(
"ocl_read_func")
1308 subroutine_name = symtab.new_symbol(
"read_from_device",
1309 symbol_type=RoutineSymbol,
1310 tag=
"ocl_read_func").name
1314 subroutine read_sub(from, to, startx, starty, nx, ny, blocking)
1315 USE iso_c_binding, only: c_ptr, c_intptr_t, c_size_t, c_sizeof
1316 USE ocl_utils_mod, ONLY: check_status
1317 use kind_params_mod, only: go_wp
1319 USE fortcl, ONLY: get_cmd_queues
1320 type(c_ptr), intent(in) :: from
1321 real(go_wp), intent(inout), dimension(:,:), target :: to
1322 integer, intent(in) :: startx, starty, nx, ny
1323 logical, intent(in) :: blocking
1324 INTEGER(c_size_t) :: size_in_bytes, offset_in_bytes
1325 integer(c_intptr_t) :: cl_mem
1326 INTEGER(c_intptr_t), pointer :: cmd_queues(:)
1329 ! Give the from pointer the appropriate OpenCL memory object type
1330 cl_mem = transfer(from, cl_mem)
1331 cmd_queues => get_cmd_queues()
1333 ! Two copy strategies depending on how much of the total length
1335 if (nx < size(to, 1) / 2) then
1336 ! Dispatch asynchronous copies of just the contiguous data.
1337 do i = starty, starty+ny
1338 size_in_bytes = int(nx, 8) * c_sizeof(to(1,1))
1339 offset_in_bytes = int(size(to, 1) * (i-1) + (startx-1)) &
1341 ierr = clEnqueueReadBuffer( &
1342 cmd_queues({self._OCL_MANAGEMENT_QUEUE}), cl_mem, &
1343 CL_FALSE, offset_in_bytes, size_in_bytes, &
1344 C_LOC(to(startx, i)), 0, C_NULL_PTR, C_NULL_PTR)
1345 CALL check_status("clEnqueueReadBuffer", ierr)
1348 CALL check_status("clFinish on read", &
1349 clFinish(cmd_queues({self._OCL_MANAGEMENT_QUEUE})))
1352 ! Copy across the whole starty:starty+ny rows in a single
1354 size_in_bytes = int(size(to, 1) * ny, 8) * c_sizeof(to(1,1))
1355 offset_in_bytes = int(size(to,1)*(starty-1), 8) &
1357 ierr = clEnqueueReadBuffer( &
1358 cmd_queues({self._OCL_MANAGEMENT_QUEUE}), cl_mem, &
1359 CL_TRUE, offset_in_bytes, size_in_bytes, &
1360 C_LOC(to(1,starty)), 0, C_NULL_PTR, C_NULL_PTR)
1361 CALL check_status("clEnqueueReadBuffer", ierr)
1363 end subroutine read_sub
1368 container = fortran_reader.psyir_from_source(code)
1369 subroutine = container.children[0]
1372 subroutine.name = subroutine_name
1375 node.addchild(subroutine.detach())
1377 return symtab.lookup_with_tag(
"ocl_read_func")
1379 def _insert_ocl_write_to_device_function(self, node):
1381 Returns the symbol of a subroutine that writes the buffer data into
1382 an OpenCL device using FortCL. If the subroutine doesn't already exist
1383 it also generates it.
1385 :param node: the container where the new subroutine will be inserted.
1386 :type node: :py:class:`psyclone.psyir.nodes.Container`
1388 :returns: the symbol of the buffer writing subroutine.
1389 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1392 symtab = node.symbol_table
1394 return symtab.lookup_with_tag(
"ocl_write_func")
1401 subroutine_name = symtab.new_symbol(
"write_to_device",
1402 symbol_type=RoutineSymbol,
1403 tag=
"ocl_write_func").name
1407 subroutine write_sub(from, to, startx, starty, nx, ny, blocking)
1408 USE iso_c_binding, only: c_ptr, c_intptr_t, c_size_t, c_sizeof
1409 USE ocl_utils_mod, ONLY: check_status
1410 use kind_params_mod, only: go_wp
1412 USE fortcl, ONLY: get_cmd_queues
1413 real(go_wp), intent(in), dimension(:,:), target :: from
1414 type(c_ptr), intent(in) :: to
1415 integer, intent(in) :: startx, starty, nx, ny
1416 logical, intent(in) :: blocking
1417 integer(c_intptr_t) :: cl_mem
1418 INTEGER(c_size_t) :: size_in_bytes, offset_in_bytes
1419 INTEGER(c_intptr_t), pointer :: cmd_queues(:)
1422 ! Give the to pointer the appropriate OpenCL memory object type
1423 cl_mem = transfer(to, cl_mem)
1424 cmd_queues => get_cmd_queues()
1426 ! Two copy strategies depending on how much of the total length
1428 if (nx < size(from,1) / 2) then
1429 ! Dispatch asynchronous copies of just the contiguous data.
1430 do i=starty, starty+ny
1431 size_in_bytes = int(nx, 8) * c_sizeof(from(1,1))
1432 offset_in_bytes = int(size(from, 1) * (i-1) + (startx-1)) &
1433 * c_sizeof(from(1,1))
1434 ierr = clEnqueueWriteBuffer( &
1435 cmd_queues({self._OCL_MANAGEMENT_QUEUE}), cl_mem, &
1436 CL_FALSE, offset_in_bytes, size_in_bytes, &
1437 C_LOC(from(startx, i)), 0, C_NULL_PTR, C_NULL_PTR)
1438 CALL check_status("clEnqueueWriteBuffer", ierr)
1441 CALL check_status("clFinish on write", &
1442 clFinish(cmd_queues({self._OCL_MANAGEMENT_QUEUE})))
1445 ! Copy across the whole starty:starty+ny rows in a single
1447 size_in_bytes = int(size(from,1) * ny, 8) * c_sizeof(from(1,1))
1448 offset_in_bytes = int(size(from,1) * (starty-1)) &
1449 * c_sizeof(from(1,1))
1450 ierr = clEnqueueWriteBuffer(&
1451 cmd_queues({self._OCL_MANAGEMENT_QUEUE}), cl_mem, &
1452 CL_TRUE, offset_in_bytes, size_in_bytes, &
1453 C_LOC(from(1, starty)), 0, C_NULL_PTR, C_NULL_PTR)
1454 CALL check_status("clEnqueueWriteBuffer", ierr)
1456 end subroutine write_sub
1461 container = fortran_reader.psyir_from_source(code)
1462 subroutine = container.children[0]
1464 subroutine.name = subroutine_name
1467 node.addchild(subroutine.detach())
1469 return symtab.lookup_with_tag(
"ocl_write_func")
1472 def _insert_ocl_initialise_buffer(node):
1474 Returns the symbol of a subroutine that initialises an OpenCL buffer in
1475 the OpenCL device using FortCL. If the subroutine doesn't already exist
1476 it also generates it.
1478 :param node: the container where the new subroutine will be inserted.
1479 :type node: :py:class:`psyclone.psyir.nodes.Container`
1480 :returns: the symbol of the buffer initialisation subroutine.
1481 :rtype: :py:class:`psyclone.psyir.symbols.RoutineSymbol`
1485 symtab = node.symbol_table
1487 return symtab.lookup_with_tag(
"ocl_init_buffer_func")
1494 subroutine_name = symtab.new_symbol(
"initialise_device_buffer",
1495 symbol_type=RoutineSymbol,
1496 tag=
"ocl_init_buffer_func").name
1499 api_config = Config.get().api_conf(
"gocean1.0")
1501 api_config.grid_properties[
"go_grid_data"].fortran.format(
"field")
1502 props = api_config.grid_properties
1503 num_x = props[
"go_grid_nx"].fortran.format(
"field")
1504 num_y = props[
"go_grid_ny"].fortran.format(
"field")
1509 read_fp = symtab.lookup_with_tag(
"ocl_read_func").name
1510 write_fp = symtab.lookup_with_tag(
"ocl_write_func").name
1514 subroutine initialise_device_buffer(field)
1515 USE fortcl, ONLY: create_rw_buffer
1516 USE iso_c_binding, only: c_size_t
1518 type(r2d_field), intent(inout), target :: field
1519 integer(kind=c_size_t) size_in_bytes
1520 IF (.NOT. field%data_on_device) THEN
1521 size_in_bytes = int({num_x} * {num_y}, 8) * &
1522 c_sizeof({host_buff}(1,1))
1523 ! Create buffer on device, we store it without type information
1524 ! on the dl_esm_inf pointer (transfer/static_cast to void*)
1525 field%device_ptr = transfer( &
1526 create_rw_buffer(size_in_bytes), &
1528 field%data_on_device = .true.
1529 field%read_from_device_f => {read_fp}
1530 field%write_to_device_f => {write_fp}
1532 end subroutine initialise_device_buffer
1537 container = fortran_reader.psyir_from_source(code)
1538 subroutine = container.children[0]
1540 subroutine.name = subroutine_name
1543 node.addchild(subroutine.detach())
1545 return symtab.lookup_with_tag(
"ocl_init_buffer_func")
1549 __all__ = [
"GOOpenCLTrans"]
def _add_end_barrier(self, node, flag, cl_finish, qlist)
def _add_divisibility_check(node, position, check_status, global_size_expr, local_size)
def _insert_kernel_code_in_opencl_file(self, kernel)
def _insert_write_grid_buffers(self, node)
def _insert_opencl_init_routine(self, node)
def _generate_set_args_call(kernel, scope)
def _output_opencl_kernels_file(self)
def _add_ready_check(node, position, check_status, kernel_name, flag, cl_finish, cmd_queue)
def _insert_initialise_grid_buffers(node)
def _insert_ocl_arg_setter_routine(node, kernel)
def _insert_ocl_write_to_device_function(self, node)
def _insert_ocl_initialise_buffer(node)
def _insert_ocl_read_from_device_function(self, node)
def validate(self, node, options=None)
def apply(self, node, options=None)
int _OCL_MANAGEMENT_QUEUE
def _add_kernel_check(node, position, check_status, kernel_name, flag, cl_finish, cmd_queue)