39 ''' This module provides the various transformations that can be applied to
40 PSyIR nodes. There are both general and API-specific transformation
41 classes in this module where the latter typically apply API-specific
42 checks before calling the base class for the actual transformation. '''
48 from psyclone
import psyGen
58 from psyclone.psyGen import (Transformation, CodedKern, Kern, InvokeSchedule,
61 ACCDataDirective, ACCDirective, ACCEnterDataDirective, ACCKernelsDirective,
62 ACCLoopDirective, ACCParallelDirective, ACCRoutineDirective, Assignment,
63 Call, CodeBlock, Directive, Loop, Node, OMPDeclareTargetDirective,
64 OMPDirective, OMPMasterDirective,
65 OMPParallelDirective, OMPParallelDoDirective, OMPSerialDirective,
66 OMPSingleDirective, OMPTaskloopDirective, PSyDataNode, Reference,
67 Return, Routine, Schedule)
72 ArgumentInterface, DataSymbol, UnresolvedType, INTEGER_TYPE, ScalarType,
83 def check_intergrid(node):
85 Utility function to check that the supplied node does not have
86 an intergrid kernel amongst its descendants.
88 This is used ensure any attempt to apply loop-fusion and redundant-
89 computation transformations to loops containing inter-grid kernels is
90 rejected (since support for those is not yet implemented).
92 :param node: the PSyIR node to check.
93 :type node: :py:class:`psyir.nodes.Node`
95 :raises TransformationError: if the supplied node has an inter-grid \
96 kernel as a descendant.
101 child_kernels = node.walk(LFRicKern)
102 for kern
in child_kernels:
103 if kern.is_intergrid:
104 raise TransformationError(
105 f
"This Transformation cannot currently be applied to nodes "
106 f
"which have inter-grid kernels as descendents and {kern.name}"
107 f
" is such a kernel.")
112 Adds an OpenMP taskloop directive to a loop. Only one of grainsize or
113 num_tasks must be specified.
115 TODO: #1364 Taskloops do not yet support reduction clauses.
117 :param grainsize: the grainsize to use in for this transformation.
118 :type grainsize: int or None
119 :param num_tasks: the num_tasks to use for this transformation.
120 :type num_tasks: int or None
121 :param bool nogroup: whether or not to use a nogroup clause for this
122 transformation. Default is False.
126 >>> from pysclone.parse.algorithm import parse
127 >>> from psyclone.psyGen import PSyFactory
128 >>> api = "gocean1.0"
129 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
130 >>> psy = PSyFactory(api).create(invokeInfo)
132 >>> from psyclone.transformations import OMPParallelTrans, OMPSingleTrans
133 >>> from psyclone.transformations import OMPTaskloopTrans
134 >>> from psyclone.psyir.transformations import OMPTaskwaitTrans
135 >>> singletrans = OMPSingleTrans()
136 >>> paralleltrans = OMPParallelTrans()
137 >>> tasklooptrans = OMPTaskloopTrans()
138 >>> taskwaittrans = OMPTaskwaitTrans()
140 >>> schedule = psy.invokes.get('invoke_0').schedule
141 >>> # Uncomment the following line to see a text view of the schedule
142 >>> # print(schedule.view())
144 >>> # Apply the OpenMP Taskloop transformation to *every* loop
145 >>> # in the schedule.
146 >>> # This ignores loop dependencies. These can be handled
147 >>> # by the OMPTaskwaitTrans
148 >>> for child in schedule.children:
149 >>> tasklooptrans.apply(child)
150 >>> # Enclose all of these loops within a single OpenMP
152 >>> singletrans.apply(schedule.children)
153 >>> # Enclose all of these loops within a single OpenMP
154 >>> # PARALLEL region
155 >>> paralleltrans.apply(schedule.children)
156 >>> # Ensure loop dependencies are satisfied
157 >>> taskwaittrans.apply(schedule.children)
158 >>> # Uncomment the following line to see a text view of the schedule
159 >>> # print(schedule.view())
162 def __init__(self, grainsize=None, num_tasks=None, nogroup=False):
171 return "Adds an 'OpenMP TASKLOOP' directive to a loop"
174 def omp_nogroup(self):
176 Returns whether the nogroup clause should be specified for
177 this transformation. By default the nogroup clause is applied.
179 :returns: whether the nogroup clause should be specified by
186 def omp_nogroup(self, nogroup):
188 Sets whether the nogroup clause should be specified for this
191 :param bool nogroup: value to set whether the nogroup clause should be
192 used for this transformation.
194 raises TypeError: if the nogroup parameter is not a bool.
196 if not isinstance(nogroup, bool):
197 raise TypeError(f
"Expected nogroup to be a bool "
198 f
"but got a {type(nogroup).__name__}")
202 def omp_grainsize(self):
204 Returns the grainsize that will be specified by
205 this transformation. By default the grainsize
206 clause is not applied, so grainsize is None.
208 :returns: The grainsize specified by this transformation.
213 @omp_grainsize.setter
214 def omp_grainsize(self, value):
216 Sets the grainsize that will be specified by
217 this transformation. Checks the grainsize is
218 a positive integer value or None.
220 :param value: integer value to use in the grainsize clause.
221 :type value: int or None
223 :raises TransformationError: if value is not an int and is not None.
224 :raises TransformationError: if value is negative.
225 :raises TransformationError: if grainsize and num_tasks are \
228 if (
not isinstance(value, int))
and (value
is not None):
230 f
"got {type(value).__name__}")
232 if (value
is not None)
and (value <= 0):
234 f
"integer, got {value}")
238 "The grainsize and num_tasks clauses would both "
239 "be specified for this Taskloop transformation")
243 def omp_num_tasks(self):
245 Returns the num_tasks that will be specified
246 by this transformation. By default the num_tasks
247 clause is not applied so num_tasks is None.
249 :returns: The grainsize specified by this transformation.
254 @omp_num_tasks.setter
255 def omp_num_tasks(self, value):
257 Sets the num_tasks that will be specified by
258 this transformation. Checks that num_tasks is
259 a positive integer value or None.
261 :param value: integer value to use in the num_tasks clause.
262 :type value: int or None
264 :raises TransformationError: if value is not an int and is not None.
265 :raises TransformationError: if value is negative.
266 :raises TransformationError: if grainsize and num_tasks are \
270 if (
not isinstance(value, int))
and (value
is not None):
272 f
" got {type(value).__name__}")
274 if (value
is not None)
and (value <= 0):
276 f
"integer, got {value}")
280 "The grainsize and num_tasks clauses would both "
281 "be specified for this Taskloop transformation")
284 def _directive(self, children, collapse=None):
286 Creates the type of directive needed for this sub-class of
289 :param children: list of Nodes that will be the children of \
290 the created directive.
291 :type children: list of :py:class:`psyclone.psyir.nodes.Node`
292 :param int collapse: currently un-used but required to keep \
293 interface the same as in base class.
294 :returns: the new node representing the directive in the AST.
295 :rtype: :py:class:`psyclone.psyir.nodes.OMPTaskloopDirective`
297 :raises NotImplementedError: if a collapse argument is supplied
301 raise NotImplementedError(
302 "The COLLAPSE clause is not yet supported for "
303 "'!$omp taskloop' directives.")
310 def apply(self, node, options=None):
311 '''Apply the OMPTaskloopTrans transformation to the specified node in
312 a Schedule. This node must be a Loop since this transformation
313 corresponds to wrapping the generated code with directives like so:
315 .. code-block:: fortran
323 At code-generation time (when
324 :py:meth:`OMPTaskloopDirective.gen_code` is called), this node must be
325 within (i.e. a child of) an OpenMP SERIAL region.
327 If the keyword "nogroup" is specified in the options, it will cause a
328 nogroup clause be generated if it is set to True. This will override
329 the value supplied to the constructor, but will only apply to the
330 apply call to which the value is supplied.
332 :param node: the supplied node to which we will apply the \
333 OMPTaskloopTrans transformation
334 :type node: :py:class:`psyclone.psyir.nodes.Node`
335 :param options: a dictionary with options for transformations\
337 :type options: Optional[Dict[str, Any]]
338 :param bool options["nogroup"]:
339 indicating whether a nogroup clause should be applied to
352 super().
apply(node, options)
359 ''' This Mixin provides the "validate_it_can_run_on_gpu" method that
360 given a routine or kernel node, it checks that the callee code is valid
361 to run on a GPU. It is implemented as a Mixin because transformations
362 from multiple programming models, e.g. OpenMP and OpenACC, can reuse
368 Check that the supplied node can be marked as available to be
371 :param node: the kernel or routine to validate.
372 :type node: :py:class:`psyclone.psyGen.Kern` |
373 :py:class:`psyclone.psyir.nodes.Routine`
374 :param options: a dictionary with options for transformations.
375 :type options: Optional[Dict[str, Any]]
376 :param bool options["force"]: whether to allow routines with
377 CodeBlocks to run on the GPU.
379 :raises TransformationError: if the node is not a kernel or a routine.
380 :raises TransformationError: if the target is a built-in kernel.
381 :raises TransformationError: if it is a kernel but without an
383 :raises TransformationError: if any of the symbols in the kernel are
384 accessed via a module use statement.
385 :raises TransformationError: if the kernel contains any calls to other
388 force = options.get(
"force",
False)
if options
else False
390 if not isinstance(node, (Kern, Routine)):
392 f
"The {type(self).__name__} must be applied to a sub-class of "
393 f
"Kern or Routine but got '{type(node).__name__}'.")
396 if isinstance(node, BuiltIn):
398 f
"Applying {type(self).__name__} to a built-in kernel is not "
399 f
"yet supported and kernel '{node.name}' is of type "
400 f
"'{type(node).__name__}'")
402 if isinstance(node, Kern):
408 kernel_schedule = node.get_kernel_schedule()
409 except Exception
as error:
411 f
"Failed to create PSyIR for kernel '{node.name}'. "
412 f
"Cannot transform such a kernel.")
from error
416 kernel_schedule = node
423 refs = kernel_schedule.walk(Reference)
425 if ref.symbol.is_import:
428 ref.symbol.resolve_type()
432 if (isinstance(ref.symbol, DataSymbol)
and
433 ref.symbol.is_constant):
437 f
"{k_or_r} '{node.name}' accesses the symbol "
438 f
"'{ref.symbol}' which is imported. If this symbol "
439 f
"represents data then it must first be converted to a "
440 f
"{k_or_r} argument using the KernelImportsToArguments "
446 cblocks = kernel_schedule.walk(CodeBlock)
449 cblock_txt = (
"\n " +
"\n ".join(str(node)
for node
in
450 cblocks[0].get_ast_nodes)
452 option_txt =
"options={'force': True}"
454 f
"Cannot safely apply {type(self).__name__} to {k_or_r} "
455 f
"'{node.name}' because its PSyIR contains one or more "
456 f
"CodeBlocks:{cblock_txt}You may use '{option_txt}' to "
457 f
"override this check.")
462 for cblock
in cblocks:
463 names = cblock.get_symbol_names()
465 sym = kernel_schedule.symbol_table.lookup(name)
468 f
"{k_or_r} '{node.name}' accesses the symbol "
469 f
"'{sym.name}' within a CodeBlock and this symbol "
470 f
"is imported. {type(self).__name__} cannot be "
471 f
"applied to such a {k_or_r}.")
473 calls = kernel_schedule.walk(Call)
475 if not call.is_available_on_device():
476 call_str = call.debug_string().rstrip(
"\n")
478 f
"{k_or_r} '{node.name}' calls another routine "
479 f
"'{call_str}' which is not available on the "
480 f
"accelerator device and therefore cannot have "
481 f
"{type(self).__name__} applied to it (TODO #342).")
486 Adds an OpenMP declare target directive to the specified routine.
490 >>> from psyclone.psyir.frontend.fortran import FortranReader
491 >>> from psyclone.psyir.nodes import Loop
492 >>> from psyclone.transformations import OMPDeclareTargetTrans
494 >>> tree = FortranReader().psyir_from_source("""
495 ... subroutine my_subroutine(A)
496 ... integer, dimension(10, 10), intent(inout) :: A
506 >>> omptargettrans = OMPDeclareTargetTrans()
507 >>> omptargettrans.apply(tree.walk(Routine)[0])
511 .. code-block:: fortran
513 subroutine my_subroutine(A)
514 integer, dimension(10, 10), intent(inout) :: A
526 def apply(self, node, options=None):
527 ''' Insert an OMPDeclareTargetDirective inside the provided routine.
529 :param node: the PSyIR routine to insert the directive into.
530 :type node: :py:class:`psyclone.psyir.nodes.Routine`
531 :param options: a dictionary with options for transformations.
532 :type options: Optional[Dict[str, Any]]
536 for child
in node.children:
537 if isinstance(child, OMPDeclareTargetDirective):
542 ''' Check that an OMPDeclareTargetDirective can be inserted.
544 :param node: the kernel or routine which is the target of this
546 :type node: :py:class:`psyclone.psyGen.Kern` |
547 :py:class:`psyclone.psyir.nodes.Routine`
548 :param options: a dictionary with options for transformations.
549 :type options: Optional[Dict[str, Any]]
550 :param bool options["force"]: whether to allow routines with
551 CodeBlocks to run on the GPU.
553 :raises TransformationError: if the node is not a kernel or a routine.
554 :raises TransformationError: if the target is a built-in kernel.
555 :raises TransformationError: if it is a kernel but without an
557 :raises TransformationError: if any of the symbols in the kernel are
558 accessed via a module use statement.
559 :raises TransformationError: if the kernel contains any calls to other
563 super().
validate(node, options=options)
570 Adds an OpenACC loop directive to a loop. This directive must be within
571 the scope of some OpenACC Parallel region (at code-generation time).
575 >>> from psyclone.parse.algorithm import parse
576 >>> from psyclone.parse.utils import ParseError
577 >>> from psyclone.psyGen import PSyFactory
578 >>> from psyclone.errors import GenerationError
579 >>> api = "gocean1.0"
580 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
581 >>> psy = PSyFactory(api).create(invokeInfo)
583 >>> from psyclone.psyGen import TransInfo
585 >>> ltrans = t.get_trans_name('ACCLoopTrans')
586 >>> rtrans = t.get_trans_name('ACCParallelTrans')
588 >>> schedule = psy.invokes.get('invoke_0').schedule
589 >>> # Uncomment the following line to see a text view of the schedule
590 >>> # print(schedule.view())
592 >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule
593 >>> for child in schedule.children[:]:
594 ... ltrans.apply(child)
596 >>> # Enclose all of these loops within a single OpenACC parallel region
597 >>> rtrans.apply(schedule)
603 excluded_node_types = (PSyDataNode,)
610 self.
_gang_gang =
False
615 return "Adds an 'OpenACC loop' directive to a loop"
617 def _directive(self, children, collapse=None):
619 Creates the ACCLoopDirective needed by this sub-class of
622 :param children: list of child nodes of the new directive Node.
623 :type children: list of :py:class:`psyclone.psyir.nodes.Node`
624 :param int collapse: number of nested loops to collapse or None if
625 no collapse attribute is required.
631 gang=self.
_gang_gang,
635 def apply(self, node, options=None):
637 Apply the ACCLoop transformation to the specified node. This node
638 must be a Loop since this transformation corresponds to
639 inserting a directive immediately before a loop, e.g.:
641 .. code-block:: fortran
648 At code-generation time (when
649 :py:meth:`psyclone.psyir.nodes.ACCLoopDirective.gen_code` is called),
650 this node must be within (i.e. a child of) a PARALLEL region.
652 :param node: the supplied node to which we will apply the
654 :type node: :py:class:`psyclone.psyir.nodes.Loop`
655 :param options: a dictionary with options for transformations.
656 :type options: Optional[Dict[str, Any]]
657 :param int options["collapse"]: number of nested loops to collapse.
658 :param bool options["independent"]: whether to add the "independent"
659 clause to the directive (not strictly necessary within
661 :param bool options["sequential"]: whether to add the "seq" clause to
663 :param bool options["gang"]: whether to add the "gang" clause to the
665 :param bool options["vector"]: whether to add the "vector" clause to
673 self.
_independent_independent = options.get(
"independent",
True)
674 self.
_sequential_sequential = options.get(
"sequential",
False)
675 self.
_gang_gang = options.get(
"gang",
False)
676 self.
_vector_vector = options.get(
"vector",
False)
679 super().
apply(node, options)
684 ''' Adds an OpenMP PARALLEL DO directive to a loop.
688 >>> from psyclone.parse.algorithm import parse
689 >>> from psyclone.psyGen import PSyFactory
690 >>> ast, invokeInfo = parse("dynamo.F90")
691 >>> psy = PSyFactory("dynamo0.3").create(invokeInfo)
692 >>> schedule = psy.invokes.get('invoke_v3_kernel_type').schedule
693 >>> # Uncomment the following line to see a text view of the schedule
694 >>> # print(schedule.view())
696 >>> from psyclone.transformations import OMPParallelLoopTrans
697 >>> trans = OMPParallelLoopTrans()
698 >>> trans.apply(schedule.children[0])
699 >>> # Uncomment the following line to see a text view of the schedule
700 >>> # print(schedule.view())
704 return "Add an 'OpenMP PARALLEL DO' directive"
706 def apply(self, node, options=None):
707 ''' Apply an OMPParallelLoop Transformation to the supplied node
708 (which must be a Loop). In the generated code this corresponds to
709 wrapping the Loop with directives:
711 .. code-block:: fortran
713 !$OMP PARALLEL DO ...
717 !$OMP END PARALLEL DO
719 :param node: the node (loop) to which to apply the transformation.
720 :type node: :py:class:`psyclone.f2pygen.DoGen`
721 :param options: a dictionary with options for transformations\
723 :type options: Optional[Dict[str, Any]]
729 node_parent = node.parent
730 node_position = node.position
738 node_parent.addchild(directive, index=node_position)
743 ''' Dynamo-specific OpenMP loop transformation. Adds Dynamo specific
744 validity checks. Actual transformation is done by the
745 :py:class:`base class <OMPParallelLoopTrans>`.
747 :param str omp_directive: choose which OpenMP loop directive to use.
749 :param str omp_schedule: the OpenMP schedule to use. Must be one of
750 'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to
754 def __init__(self, omp_directive="do", omp_schedule="static"):
755 super().__init__(omp_directive=omp_directive,
756 omp_schedule=omp_schedule)
759 return "Add an OpenMP Parallel Do directive to a Dynamo loop"
763 Perform LFRic-specific loop validity checks then call the `validate`
764 method of the base class.
766 :param node: the Node in the Schedule to check
767 :type node: :py:class:`psyclone.psyir.nodes.Node`
768 :param options: a dictionary with options for transformations.
769 :type options: Optional[Dict[str, Any]]
771 :raises TransformationError: if the supplied Node is not a LFRicLoop.
772 :raises TransformationError: if the associated loop requires
775 if not isinstance(node, LFRicLoop):
777 f
"Error in {self.name} transformation. The supplied node "
778 f
"must be a LFRicLoop but got '{type(node).__name__}'")
785 if node.field_space.orig_name
not in const.VALID_DISCONTINUOUS_NAMES:
786 if node.loop_type !=
'colour' and node.has_inc_arg():
788 f
"Error in {self.name} transformation. The kernel has an "
789 f
"argument with INC access. Colouring is required.")
795 local_options = options.copy()
if options
else {}
796 local_options[
"force"] =
True
797 super().
validate(node, options=local_options)
802 '''GOcean specific OpenMP Do loop transformation. Adds GOcean
803 specific validity checks (that supplied Loop is an inner or outer
804 loop). Actual transformation is done by
805 :py:class:`base class <OMPParallelLoopTrans>`.
807 :param str omp_directive: choose which OpenMP loop directive to use. \
809 :param str omp_schedule: the OpenMP schedule to use. Must be one of \
810 'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
814 def __init__(self, omp_directive="do", omp_schedule="static"):
815 super().__init__(omp_directive=omp_directive,
816 omp_schedule=omp_schedule)
819 return "Add an OpenMP Parallel Do directive to a GOcean loop"
821 def apply(self, node, options=None):
822 ''' Perform GOcean-specific loop validity checks then call
823 :py:meth:`OMPParallelLoopTrans.apply`.
825 :param node: a Loop node from an AST.
826 :type node: :py:class:`psyclone.psyir.nodes.Loop`
827 :param options: a dictionary with options for transformations\
829 :type options: Optional[Dict[str, Any]]
831 :raises TransformationError: if the supplied node is not an inner or\
838 if node.loop_type
not in [
"inner",
"outer"]:
840 "Error in "+self.
namenamename+
" transformation. The requested loop"
841 " is not of type inner or outer.")
843 OMPParallelLoopTrans.apply(self, node)
848 ''' LFRic (Dynamo 0.3) specific orphan OpenMP loop transformation. Adds
849 Dynamo-specific validity checks.
851 :param str omp_schedule: the OpenMP schedule to use. Must be one of \
852 'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
856 def __init__(self, omp_schedule="static"):
857 super().__init__(omp_directive=
"do", omp_schedule=omp_schedule)
860 return "Add an OpenMP DO directive to a Dynamo 0.3 loop"
863 ''' Perform LFRic (Dynamo 0.3) specific loop validity checks for the
866 :param node: the Node in the Schedule to check
867 :type node: :py:class:`psyclone.psyir.nodes.Node`
868 :param options: a dictionary with options for transformations \
870 :type options: Optional[Dict[str, Any]]
871 :param bool options["reprod"]: \
872 indicating whether reproducible reductions should be used. \
873 By default the value from the config file will be used.
875 :raises TransformationError: if an OMP loop transform would create \
884 options = options.copy()
886 options[
"reprod"] = options.get(
"reprod",
887 Config.get().reproducible_reductions)
892 options[
"force"] =
True
893 super().
validate(node, options=options)
897 if node.loop_type !=
'colour' and node.has_inc_arg():
899 f
"Error in {self.name} transformation. The kernel has an "
900 f
"argument with INC access. Colouring is required.")
902 def apply(self, node, options=None):
903 ''' Apply LFRic (Dynamo 0.3) specific OMPLoopTrans.
905 :param node: the Node in the Schedule to check.
906 :type node: :py:class:`psyclone.psyir.nodes.Node`
907 :param options: a dictionary with options for transformations \
909 :type options: Optional[Dict[str, Any]]
910 :param bool options["reprod"]: \
911 indicating whether reproducible reductions should be used. \
912 By default the value from the config file will be used.
920 options = options.copy()
922 options[
"reprod"] = options.get(
"reprod",
923 Config.get().reproducible_reductions)
928 options[
"force"] =
True
930 super().
apply(node, options)
935 ''' GOcean-specific orphan OpenMP loop transformation. Adds GOcean
936 specific validity checks (that the node is either an inner or outer
939 :param str omp_directive: choose which OpenMP loop directive to use. \
941 :param str omp_schedule: the OpenMP schedule to use. Must be one of \
942 'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
946 def __init__(self, omp_directive="do", omp_schedule="static"):
947 super().__init__(omp_directive=omp_directive,
948 omp_schedule=omp_schedule)
951 return "Add the selected OpenMP loop directive to a GOcean loop"
955 Checks that the supplied node is a valid target for parallelisation
956 using OMP directives.
958 :param node: the candidate loop for parallelising using OMP Do.
959 :type node: :py:class:`psyclone.psyir.nodes.Loop`
960 :param options: a dictionary with options for transformations.
961 :type options: Optional[Dict[str, Any]]
963 :raises TransformationError: if the loop_type of the supplied Loop is \
964 not "inner" or "outer".
967 super().
validate(node, options=options)
970 if node.loop_type
not in [
"inner",
"outer"]:
972 " The requested loop is not of type "
978 Apply a colouring transformation to a loop (in order to permit a
979 subsequent parallelisation over colours). For example:
982 >>> schedule = invoke.schedule
984 >>> ctrans = ColourTrans()
986 >>> # Colour all of the loops
987 >>> for child in schedule.children:
988 >>> ctrans.apply(child)
990 >>> # Uncomment the following line to see a text view of the schedule
991 >>> # print(schedule.view())
995 return "Split a loop into colours"
997 def apply(self, node, options=None):
999 Converts the Loop represented by :py:obj:`node` into a
1000 nested loop where the outer loop is over colours and the inner
1001 loop is over cells of that colour.
1003 :param node: the loop to transform.
1004 :type node: :py:class:`psyclone.psyir.nodes.Loop`
1005 :param options: options for the transformation.
1006 :type options: Optional[Dict[str, Any]]
1014 node.parent.addchild(colours_loop, index=node.position)
1017 colours_loop.loop_body[0].loop_body.children.extend(
1018 node.loop_body.pop_all_children())
1023 def _create_colours_loop(self, node):
1025 Creates a nested loop (colours, and cells of a given colour) to
1026 replace the supplied loop over cells.
1028 :param node: the loop for which to create a coloured version.
1029 :type node: :py:class:`psyclone.psyir.nodes.Loop`
1031 :returns: doubly-nested loop over colours and cells of a given colour.
1032 :rtype: :py:class:`psyclone.psyir.nodes.Loop`
1034 :raises NotImplementedError: this method must be overridden in an \
1035 API-specific sub-class.
1037 raise InternalError(
"_create_colours_loop() must be overridden in an "
1038 "API-specific sub-class.")
1043 '''Split a Dynamo 0.3 loop over cells into colours so that it can be
1044 parallelised. For example:
1046 >>> from psyclone.parse.algorithm import parse
1047 >>> from psyclone.psyGen import PSyFactory
1048 >>> import transformations
1052 >>> TEST_API = "dynamo0.3"
1053 >>> _,info=parse(os.path.join(os.path.dirname(os.path.abspath(__file__)),
1054 >>> "tests", "test_files", "dynamo0p3",
1055 >>> "4.6_multikernel_invokes.f90"),
1057 >>> psy = PSyFactory(TEST_API).create(info)
1058 >>> invoke = psy.invokes.get('invoke_0')
1059 >>> schedule = invoke.schedule
1061 >>> ctrans = Dynamo0p3ColourTrans()
1062 >>> otrans = DynamoOMPParallelLoopTrans()
1064 >>> # Colour all of the loops
1065 >>> for child in schedule.children:
1066 >>> ctrans.apply(child)
1068 >>> # Then apply OpenMP to each of the colour loops
1069 >>> for child in schedule.children:
1070 >>> otrans.apply(child.children[0])
1072 >>> # Uncomment the following line to see a text view of the schedule
1073 >>> # print(schedule.view())
1075 Colouring in the LFRic (Dynamo 0.3) API is subject to the following rules:
1077 * Only kernels which operate on 'CELL_COLUMN's and which increment a
1078 field on a continuous function space require colouring. Kernels that
1079 update a field on a discontinuous function space will cause this
1080 transformation to raise an exception. Kernels that only write to a field
1081 on a continuous function space also do not require colouring but are
1083 * A kernel may have at most one field with 'GH_INC' access.
1084 * A separate colour map will be required for each field that is coloured
1085 (if an invoke contains >1 kernel call).
1089 return "Split a Dynamo 0.3 loop over cells into colours"
1092 '''Performs Dynamo0.3-specific error checking and then uses the parent
1093 class to convert the Loop represented by :py:obj:`node` into a
1094 nested loop where the outer loop is over colours and the inner
1095 loop is over cells of that colour.
1097 :param node: the loop to transform.
1098 :type node: :py:class:`psyclone.domain.lfric.LFRicLoop`
1099 :param options: a dictionary with options for transformations.\
1100 :type options: Optional[Dict[str, Any]]
1104 super().
validate(node, options=options)
1108 if node.field_space.orig_name
in \
1109 const.VALID_DISCONTINUOUS_NAMES:
1111 "Error in DynamoColour transformation. Loops iterating over "
1112 "a discontinuous function space are not currently supported.")
1117 if node.loop_type !=
"":
1119 f
"Error in DynamoColour transformation. Only loops over cells "
1120 f
"may be coloured but this loop is over {node.loop_type}")
1123 if not node.has_inc_arg():
1131 if node.ancestor(OMPDirective):
1133 "within an OpenMP parallel region.")
1135 super().
apply(node, options=options)
1137 def _create_colours_loop(self, node):
1139 Creates a nested loop (colours, and cells of a given colour) which
1140 can be used to replace the supplied loop over cells.
1142 :param node: the loop for which to create a coloured version.
1143 :type node: :py:class:`psyclone.psyir.nodes.Loop`
1145 :returns: doubly-nested loop over colours and cells of a given colour.
1146 :rtype: :py:class:`psyclone.psyir.nodes.Loop`
1151 colours_loop = node.__class__(parent=node.parent, loop_type=
"colours")
1152 colours_loop.field_space = node.field_space
1153 colours_loop.iteration_space = node.iteration_space
1154 colours_loop.set_lower_bound(
"start")
1155 colours_loop.set_upper_bound(
"ncolours")
1159 colour_loop = node.__class__(parent=colours_loop.loop_body,
1161 colour_loop.field_space = node.field_space
1162 colour_loop.field_name = node.field_name
1163 colour_loop.iteration_space = node.iteration_space
1164 colour_loop.set_lower_bound(
"start")
1165 colour_loop.kernel = node.kernel
1167 if node.upper_bound_name
in LFRicConstants().HALO_ACCESS_LOOP_BOUNDS:
1170 index = node.upper_bound_halo_depth
1171 colour_loop.set_upper_bound(
"colour_halo", index)
1174 colour_loop.set_upper_bound(
"ncolour")
1177 colours_loop.loop_body.addchild(colour_loop)
1184 Base class for transformations that create a parallel region.
1204 Check that the supplied list of Nodes are eligible to be
1205 put inside a parallel region.
1207 :param list node_list: list of nodes to put into a parallel region
1208 :param options: a dictionary with options for transformations.\
1209 :type options: Optional[Dict[str, Any]]
1210 :param bool options["node-type-check"]: this flag controls whether \
1211 or not the type of the nodes enclosed in the region should be \
1212 tested to avoid using unsupported nodes inside a region.
1214 :raises TransformationError: if the supplied node is an \
1215 InvokeSchedule rather than being within an InvokeSchedule.
1216 :raises TransformationError: if the supplied nodes are not all \
1217 children of the same parent (siblings).
1220 if isinstance(node_list[0], InvokeSchedule):
1222 f
"A {self.name} transformation cannot be applied to an "
1223 f
"InvokeSchedule but only to one or more nodes from within an "
1226 node_parent = node_list[0].parent
1228 for child
in node_list:
1229 if child.parent
is not node_parent:
1231 f
"Error in {self.name} transformation: supplied nodes are "
1232 f
"not children of the same parent.")
1233 super().
validate(node_list, options)
1235 def apply(self, target_nodes, options=None):
1238 Apply this transformation to a subset of the nodes within a
1239 schedule - i.e. enclose the specified Loops in the
1240 schedule within a single parallel region.
1242 :param target_nodes: a single Node or a list of Nodes.
1243 :type target_nodes: (list of) :py:class:`psyclone.psyir.nodes.Node`
1244 :param options: a dictionary with options for transformations.
1245 :type options: Optional[Dict[str, Any]]
1246 :param bool options["node-type-check"]: this flag controls if the \
1247 type of the nodes enclosed in the region should be tested \
1248 to avoid using unsupported nodes inside a region.
1262 node_parent = node_list[0].parent
1263 node_position = node_list[0].position
1270 children=[node.detach()
for node
in node_list])
1275 node_parent.addchild(directive, index=node_position)
1280 Create an OpenMP SINGLE region by inserting directives. The most
1281 likely use case for this transformation is to wrap around task-based
1282 transformations. The parent region for this should usually also be
1285 :param bool nowait: whether to apply a nowait clause to this \
1286 transformation. The default value is False
1290 >>> from psyclone.parse.algorithm import parse
1291 >>> from psyclone.psyGen import PSyFactory
1292 >>> api = "gocean1.0"
1293 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1294 >>> psy = PSyFactory(api).create(invokeInfo)
1296 >>> from psyclone.transformations import OMPParallelTrans, OMPSingleTrans
1297 >>> singletrans = OMPSingleTrans()
1298 >>> paralleltrans = OMPParallelTrans()
1300 >>> schedule = psy.invokes.get('invoke_0').schedule
1301 >>> # Uncomment the following line to see a text view of the schedule
1302 >>> # print(schedule.view())
1304 >>> # Enclose all of these loops within a single OpenMP
1306 >>> singletrans.apply(schedule.children)
1307 >>> # Enclose all of these loops within a single OpenMP
1308 >>> # PARALLEL region
1309 >>> paralleltrans.apply(schedule.children)
1310 >>> # Uncomment the following line to see a text view of the schedule
1311 >>> # print(schedule.view())
1315 excluded_node_types = (CodeBlock, Return, ACCDirective,
1317 OMPParallelDirective)
1319 def __init__(self, nowait=False):
1327 return "Insert an OpenMP Single region"
1330 def omp_nowait(self):
1331 ''' :returns: whether or not this Single region uses a nowait \
1332 clause to remove the end barrier.
1340 :returns: the name of this transformation.
1343 return "OMPSingleTrans"
1346 def omp_nowait(self, value):
1347 ''' Sets the nowait property that will be specified by
1348 this transformation. Checks that the value supplied in
1349 :py:obj:`value` is a bool
1351 :param bool value: whether this Single clause should have a \
1354 :raises TypeError: if the value parameter is not a bool.
1357 if not isinstance(value, bool):
1358 raise TypeError(f
"Expected nowait to be a bool "
1359 f
"but got a {type(value).__name__}")
1362 def _directive(self, children):
1364 Creates the type of directive needed for this sub-class of
1367 :param children: list of Nodes that will be the children of \
1368 the created directive.
1369 :type children: list of :py:class:`psyclone.psyir.nodes.Node`
1371 :returns: The directive created for the OpenMP Single Directive
1372 :rtype: :py:class:`psyclone.psyGen.OMPSingleDirective`
1379 def apply(self, node_list, options=None):
1381 '''Apply the OMPSingleTrans transformation to the specified node in a
1384 At code-generation time this node must be within (i.e. a child of)
1385 an OpenMP PARALLEL region. Code generation happens when
1386 :py:meth:`OMPLoopDirective.gen_code` is called, or when the PSyIR
1387 tree is given to a backend.
1389 If the keyword "nowait" is specified in the options, it will cause a
1390 nowait clause to be added if it is set to True, otherwise no clause
1393 :param node_list: the supplied node or node list to which we will \
1394 apply the OMPSingleTrans transformation
1395 :type node_list: (a list of) :py:class:`psyclone.psyir.nodes.Node`
1396 :param options: a list with options for transformations \
1398 :type options: Optional[Dict[str, Any]]
1399 :param bool options["nowait"]:
1400 indicating whether or not to use a nowait clause on this \
1406 if options.get(
"nowait")
is not None:
1409 super().
apply(node_list, options)
1414 Create an OpenMP MASTER region by inserting directives. The most
1415 likely use case for this transformation is to wrap around task-based
1416 transformations. Note that adding this directive requires a parent
1417 OpenMP parallel region (which can be inserted by OMPParallelTrans),
1418 otherwise it will produce an error in generation-time.
1422 >>> from psyclone.parse.algorithm import parse
1423 >>> from psyclone.psyGen import PSyFactory
1424 >>> api = "gocean1.0"
1425 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1426 >>> psy = PSyFactory(api).create(invokeInfo)
1428 >>> from psyclone.transformations import OMPParallelTrans, OMPMasterTrans
1429 >>> mastertrans = OMPMasterTrans()
1430 >>> paralleltrans = OMPParallelTrans()
1432 >>> schedule = psy.invokes.get('invoke_0').schedule
1433 >>> # Uncomment the following line to see a text view of the schedule
1434 >>> # print(schedule.view())
1436 >>> # Enclose all of these loops within a single OpenMP
1438 >>> mastertrans.apply(schedule.children)
1439 >>> # Enclose all of these loops within a single OpenMP
1440 >>> # PARALLEL region
1441 >>> paralleltrans.apply(schedule.children)
1442 >>> # Uncomment the following line to see a text view of the schedule
1443 >>> # print(schedule.view())
1447 excluded_node_types = (CodeBlock, Return, ACCDirective,
1449 OMPParallelDirective)
1457 return "Insert an OpenMP Master region"
1462 :returns: the name of this transformation as a string.
1465 return "OMPMasterTrans"
1470 Create an OpenMP PARALLEL region by inserting directives. For
1473 >>> from psyclone.parse.algorithm import parse
1474 >>> from psyclone.parse.utils import ParseError
1475 >>> from psyclone.psyGen import PSyFactory
1476 >>> from psyclone.errors import GenerationError
1477 >>> api = "gocean1.0"
1478 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1479 >>> psy = PSyFactory(api).create(invokeInfo)
1481 >>> from psyclone.psyGen import TransInfo
1483 >>> ltrans = t.get_trans_name('GOceanOMPLoopTrans')
1484 >>> rtrans = t.get_trans_name('OMPParallelTrans')
1486 >>> schedule = psy.invokes.get('invoke_0').schedule
1487 >>> # Uncomment the following line to see a text view of the schedule
1488 >>> # print(schedule.view())
1490 >>> # Apply the OpenMP Loop transformation to *every* loop
1491 >>> # in the schedule
1492 >>> for child in schedule.children:
1493 >>> ltrans.apply(child)
1495 >>> # Enclose all of these loops within a single OpenMP
1496 >>> # PARALLEL region
1497 >>> rtrans.apply(schedule.children)
1498 >>> # Uncomment the following line to see a text view of the schedule
1499 >>> # print(schedule.view())
1503 excluded_node_types = (CodeBlock, Return, ACCDirective,
1512 return "Insert an OpenMP Parallel region"
1517 :returns: the name of this transformation as a string.
1520 return "OMPParallelTrans"
1524 Perform OpenMP-specific validation checks.
1526 :param node_list: list of Nodes to put within parallel region.
1527 :type node_list: list of :py:class:`psyclone.psyir.nodes.Node`
1528 :param options: a dictionary with options for transformations.
1529 :type options: Optional[Dict[str, Any]]
1530 :param bool options["node-type-check"]: this flag controls if the \
1531 type of the nodes enclosed in the region should be tested \
1532 to avoid using unsupported nodes inside a region.
1534 :raises TransformationError: if the target Nodes are already within \
1535 some OMP parallel region.
1537 if node_list[0].ancestor(OMPDirective):
1539 " cannot create an OpenMP PARALLEL " +
1540 "region within another OpenMP region.")
1543 super().
validate(node_list, options)
1548 Create an OpenACC parallel region by inserting an 'acc parallel'
1551 >>> from psyclone.psyGen import TransInfo
1552 >>> from psyclone.psyir.frontend.fortran import FortranReader
1553 >>> from psyclone.psyir.backend.fortran import FortranWriter
1554 >>> from psyclone.psyir.nodes import Loop
1555 >>> psyir = FortranReader().psyir_from_source("""
1557 ... real, dimension(10) :: A
1562 ... end program do_loop
1564 >>> ptrans = TransInfo().get_trans_name('ACCParallelTrans')
1566 >>> # Enclose the loop within a OpenACC PARALLEL region
1567 >>> ptrans.apply(psyir.walk(Loop))
1568 >>> print(FortranWriter()(psyir))
1570 real, dimension(10) :: a
1573 !$acc parallel default(present)
1583 excluded_node_types = (CodeBlock, Return, PSyDataNode,
1584 ACCDataDirective, ACCEnterDataDirective,
1587 def __init__(self, default_present=True):
1589 if not isinstance(default_present, bool):
1591 f
"The provided 'default_present' argument must be a "
1592 f
"boolean, but found '{default_present}'."
1597 return "Insert an OpenACC Parallel region"
1601 Validate this transformation.
1603 :param node_list: a single Node or a list of Nodes.
1604 :type node_list: :py:class:`psyclone.psyir.nodes.Node` |
1605 List[:py:class:`psyclone.psyir.nodes.Node`]
1606 :param options: a dictionary with options for transformations.
1607 :type options: Optional[Dict[str, Any]]
1608 :param bool options["node-type-check"]: this flag controls if the
1609 type of the nodes enclosed in the region should be tested to
1610 avoid using unsupported nodes inside a region.
1611 :param bool options["default_present"]: this flag controls if the
1612 inserted directive should include the default_present clause.
1615 super().
validate(node_list, options)
1616 if options
is not None and "default_present" in options:
1617 if not isinstance(options[
"default_present"], bool):
1619 f
"The provided 'default_present' option must be a "
1620 f
"boolean, but found '{options['default_present']}'."
1623 def apply(self, target_nodes, options=None):
1625 Encapsulate given nodes with the ACCParallelDirective.
1627 :param target_nodes: a single Node or a list of Nodes.
1628 :type target_nodes: :py:class:`psyclone.psyir.nodes.Node` |
1629 List[:py:class:`psyclone.psyir.nodes.Node`]
1630 :param options: a dictionary with options for transformations.
1631 :type options: Optional[Dict[str, Any]]
1632 :param bool options["node-type-check"]: this flag controls if the
1633 type of the nodes enclosed in the region should be tested to
1634 avoid using unsupported nodes inside a region.
1635 :param bool options["default_present"]: this flag controls if the
1636 inserted directive should include the default_present clause.
1651 node_parent = node_list[0].parent
1652 node_position = node_list[0].position
1656 children=[node.detach()
for node
in node_list])
1657 directive.default_present = options.get(
"default_present",
1663 node_parent.addchild(directive, index=node_position)
1667 '''Provides a transformation to move a node in the tree. For
1670 >>> from psyclone.parse.algorithm import parse
1671 >>> from psyclone.psyGen import PSyFactory
1672 >>> ast,invokeInfo=parse("dynamo.F90")
1673 >>> psy=PSyFactory("dynamo0.3").create(invokeInfo)
1674 >>> schedule=psy.invokes.get('invoke_v3_kernel_type').schedule
1675 >>> # Uncomment the following line to see a text view of the schedule
1676 >>> # print(schedule.view())
1678 >>> from psyclone.transformations import MoveTrans
1679 >>> trans=MoveTrans()
1680 >>> trans.apply(schedule.children[0], schedule.children[2],
1681 ... options = {"position":"after")
1682 >>> # Uncomment the following line to see a text view of the schedule
1683 >>> # print(schedule.view())
1685 Nodes may only be moved to a new location with the same parent
1686 and must not break any dependencies otherwise an exception is
1690 return "Move a node to a different location"
1694 ''' Returns the name of this transformation as a string.'''
1699 ''' validity checks for input arguments.
1701 :param node: the node to be moved.
1702 :type node: :py:class:`psyclone.psyir.nodes.Node`
1703 :param location: node before or after which the given node\
1705 :type location: :py:class:`psyclone.psyir.nodes.Node`
1706 :param options: a dictionary with options for transformations.
1707 :type options: Optional[Dict[str, Any]]
1708 :param str options["position"]: either 'before' or 'after'.
1710 :raises TransformationError: if the given node is not an instance \
1711 of :py:class:`psyclone.psyir.nodes.Node`
1712 :raises TransformationError: if the location is not valid.
1716 if not isinstance(node, Node):
1718 "In the Move transformation apply method the first argument "
1725 position = options.get(
"position",
"before")
1726 if not node.is_valid_location(location, position=position):
1728 "In the Move transformation apply method, data dependencies "
1729 "forbid the move to the new location")
1731 def apply(self, node, location, options=None):
1732 '''Move the node represented by :py:obj:`node` before location
1733 :py:obj:`location` (which is also a node) by default and after
1734 if the optional `position` argument is set to 'after'.
1736 :param node: the node to be moved.
1737 :type node: :py:class:`psyclone.psyir.nodes.Node`
1738 :param location: node before or after which the given node\
1740 :type location: :py:class:`psyclone.psyir.nodes.Node`
1741 :param options: a dictionary with options for transformations.
1742 :type options: Optional[Dict[str, Any]]
1743 :param str options["position"]: either 'before' or 'after'.
1745 :raises TransformationError: if the given node is not an instance \
1746 of :py:class:`psyclone.psyir.nodes.Node`
1747 :raises TransformationError: if the location is not valid.
1756 position = options.get(
"position",
"before")
1758 parent = node.parent
1760 my_node = parent.children.pop(node.position)
1762 location_index = location.position
1763 if position ==
"before":
1764 location.parent.children.insert(location_index, my_node)
1766 location.parent.children.insert(location_index+1, my_node)
1770 '''This transformation allows the user to modify a loop's bounds so
1771 that redundant computation will be performed. Redundant
1772 computation can result in halo exchanges being modified, new halo
1773 exchanges being added or existing halo exchanges being removed.
1775 * This transformation should be performed before any
1776 parallelisation transformations (e.g. for OpenMP) to the loop in
1777 question and will raise an exception if this is not the case.
1779 * This transformation can not be applied to a loop containing a
1780 reduction and will again raise an exception if this is the case.
1782 * This transformation can only be used to add redundant
1783 computation to a loop, not to remove it.
1785 * This transformation allows a loop that is already performing
1786 redundant computation to be modified, but only if the depth is
1791 return "Change iteration space to perform redundant computation"
1794 '''Perform various checks to ensure that it is valid to apply the
1795 RedundantComputation transformation to the supplied node
1797 :param node: the supplied node on which we are performing\
1799 :type node: :py:class:`psyclone.psyir.nodes.Node`
1800 :param options: a dictionary with options for transformations.
1801 :type options: Optional[Dict[str, Any]]
1802 :param int options["depth"]: the depth of the stencil if the value \
1803 is provided and None if not.
1805 :raises TransformationError: if the parent of the loop is a\
1806 :py:class:`psyclone.psyir.nodes.Directive`.
1807 :raises TransformationError: if the parent of the loop is not a\
1808 :py:class:`psyclone.psyir.nodes.Loop` or a\
1809 :py:class:`psyclone.psyGen.DynInvokeSchedule`.
1810 :raises TransformationError: if the parent of the loop is a\
1811 :py:class:`psyclone.psyir.nodes.Loop` but the original loop does\
1812 not iterate over 'colour'.
1813 :raises TransformationError: if the parent of the loop is a\
1814 :py:class:`psyclone.psyir.nodes.Loop` but the parent does not
1815 iterate over 'colours'.
1816 :raises TransformationError: if the parent of the loop is a\
1817 :py:class:`psyclone.psyir.nodes.Loop` but the parent's parent is\
1818 not a :py:class:`psyclone.psyGen.DynInvokeSchedule`.
1819 :raises TransformationError: if this transformation is applied\
1820 when distributed memory is not switched on.
1821 :raises TransformationError: if the loop does not iterate over\
1822 cells, dofs or colour.
1823 :raises TransformationError: if the transformation is setting the\
1824 loop to the maximum halo depth but the loop already computes\
1825 to the maximum halo depth.
1826 :raises TransformationError: if the transformation is setting the\
1827 loop to the maximum halo depth but the loop contains a stencil\
1828 access (as this would result in the field being accessed\
1829 beyond the halo depth).
1830 :raises TransformationError: if the supplied depth value is not an\
1832 :raises TransformationError: if the supplied depth value is less\
1834 :raises TransformationError: if the supplied depth value is not\
1835 greater than 1 when a continuous loop is modified as this is\
1836 the minimum valid value.
1837 :raises TransformationError: if the supplied depth value is not\
1838 greater than the existing depth value, as we should not need\
1839 to undo existing transformations.
1840 :raises TransformationError: if a depth value has been supplied\
1841 but the loop has already been set to the maximum halo depth.
1846 super().
validate(node, options=options)
1856 dir_node = node.ancestor(Directive)
1859 f
"In the Dynamo0p3RedundantComputation transformation apply "
1860 f
"method the supplied loop is sits beneath a directive of "
1861 f
"type {type(dir_node)}. Redundant computation must be applied"
1862 f
" before directives are added.")
1863 if not (isinstance(node.parent, DynInvokeSchedule)
or
1864 isinstance(node.parent.parent, Loop)):
1866 f
"In the Dynamo0p3RedundantComputation transformation "
1867 f
"apply method the parent of the supplied loop must be the "
1868 f
"DynInvokeSchedule, or a Loop, but found {type(node.parent)}")
1869 if isinstance(node.parent.parent, Loop):
1870 if node.loop_type !=
"colour":
1872 f
"In the Dynamo0p3RedundantComputation transformation "
1873 f
"apply method, if the parent of the supplied Loop is "
1874 f
"also a Loop then the supplied Loop must iterate over "
1875 f
"'colour', but found '{node.loop_type}'")
1876 if node.parent.parent.loop_type !=
"colours":
1878 f
"In the Dynamo0p3RedundantComputation transformation "
1879 f
"apply method, if the parent of the supplied Loop is "
1880 f
"also a Loop then the parent must iterate over "
1881 f
"'colours', but found '{node.parent.parent.loop_type}'")
1882 if not isinstance(node.parent.parent.parent, DynInvokeSchedule):
1884 f
"In the Dynamo0p3RedundantComputation transformation "
1885 f
"apply method, if the parent of the supplied Loop is "
1886 f
"also a Loop then the parent's parent must be the "
1887 f
"DynInvokeSchedule, but found {type(node.parent)}")
1888 if not Config.get().distributed_memory:
1890 "In the Dynamo0p3RedundantComputation transformation apply "
1891 "method distributed memory must be switched on")
1895 if node.loop_type
not in [
"",
"dof",
"colour"]:
1897 f
"In the Dynamo0p3RedundantComputation transformation apply "
1898 f
"method the loop type must be one of '' (cell-columns), 'dof'"
1899 f
" or 'colour', but found '{node.loop_type}'")
1903 check_intergrid(node)
1908 depth = options.get(
"depth")
1910 if node.upper_bound_name
in const.HALO_ACCESS_LOOP_BOUNDS:
1911 if not node.upper_bound_halo_depth:
1913 "In the Dynamo0p3RedundantComputation transformation "
1914 "apply method the loop is already set to the maximum "
1915 "halo depth so this transformation does nothing")
1916 for call
in node.kernels():
1917 for arg
in call.arguments.args:
1920 f
"In the Dynamo0p3RedundantComputation "
1921 f
"transformation apply method the loop "
1922 f
"contains field '{arg.name}' with a stencil "
1923 f
"access in kernel '{call.name}', so it is "
1924 f
"invalid to set redundant computation to "
1927 if not isinstance(depth, int):
1929 f
"In the Dynamo0p3RedundantComputation transformation "
1930 f
"apply method the supplied depth should be an integer but"
1931 f
" found type '{type(depth)}'")
1934 "In the Dynamo0p3RedundantComputation transformation "
1935 "apply method the supplied depth is less than 1")
1937 if node.upper_bound_name
in const.HALO_ACCESS_LOOP_BOUNDS:
1938 if node.upper_bound_halo_depth:
1939 if node.upper_bound_halo_depth >= depth:
1941 f
"In the Dynamo0p3RedundantComputation "
1942 f
"transformation apply method the supplied depth "
1943 f
"({depth}) must be greater than the existing halo"
1944 f
" depth ({node.upper_bound_halo_depth})")
1947 "In the Dynamo0p3RedundantComputation transformation "
1948 "apply method the loop is already set to the maximum "
1949 "halo depth so can't be set to a fixed value")
1953 '''Apply the redundant computation transformation to the loop
1954 :py:obj:`loop`. This transformation can be applied to loops iterating
1955 over 'cells or 'dofs'. if :py:obj:`depth` is set to a value then the
1956 value will be the depth of the field's halo over which redundant
1957 computation will be performed. If :py:obj:`depth` is not set to a
1958 value then redundant computation will be performed to the full depth
1959 of the field's halo.
1961 :param loop: the loop that we are transforming.
1962 :type loop: :py:class:`psyclone.psyGen.LFRicLoop`
1963 :param options: a dictionary with options for transformations.
1964 :type options: Optional[Dict[str, Any]]
1965 :param int options["depth"]: the depth of the stencil. Defaults \
1972 depth = options.get(
"depth")
1974 if loop.loop_type ==
"":
1976 loop.set_upper_bound(
"cell_halo", depth)
1977 elif loop.loop_type ==
"colour":
1979 loop.set_upper_bound(
"colour_halo", depth)
1980 elif loop.loop_type ==
"dof":
1981 loop.set_upper_bound(
"dof_halo", depth)
1984 f
"Unsupported loop_type '{loop.loop_type}' found in "
1985 f
"Dynamo0p3Redundant ComputationTrans.apply()")
1988 loop.update_halo_exchanges()
1992 '''Splits a synchronous halo exchange into a halo exchange start and
1993 halo exchange end. For example:
1995 >>> from psyclone.parse.algorithm import parse
1996 >>> from psyclone.psyGen import PSyFactory
1997 >>> api = "dynamo0.3"
1998 >>> ast, invokeInfo = parse("file.f90", api=api)
1999 >>> psy=PSyFactory(api).create(invokeInfo)
2000 >>> schedule = psy.invokes.get('invoke_0').schedule
2001 >>> # Uncomment the following line to see a text view of the schedule
2002 >>> # print(schedule.view())
2004 >>> from psyclone.transformations import Dynamo0p3AsyncHaloExchangeTrans
2005 >>> trans = Dynamo0p3AsyncHaloExchangeTrans()
2006 >>> trans.apply(schedule.children[0])
2007 >>> # Uncomment the following line to see a text view of the schedule
2008 >>> # print(schedule.view())
2013 return "Changes a synchronous halo exchange into an asynchronous one."
2018 :returns: the name of this transformation as a string.
2021 return "Dynamo0p3AsyncHaloExchangeTrans"
2024 '''Transforms a synchronous halo exchange, represented by a
2025 HaloExchange node, into an asynchronous halo exchange,
2026 represented by HaloExchangeStart and HaloExchangeEnd nodes.
2028 :param node: a synchronous haloexchange node.
2029 :type node: :py:obj:`psyclone.psygen.HaloExchange`
2030 :param options: a dictionary with options for transformations.
2031 :type options: Optional[Dict[str, Any]]
2040 node.parent.addchild(
2042 node.field, check_dirty=node._check_dirty,
2043 vector_index=node.vector_index, parent=node.parent),
2044 index=node.position)
2045 node.parent.addchild(
2047 node.field, check_dirty=node._check_dirty,
2048 vector_index=node.vector_index, parent=node.parent),
2049 index=node.position)
2056 '''Internal method to check whether the node is valid for this
2059 :param node: a synchronous Halo Exchange node
2060 :type node: :py:obj:`psyclone.psygen.HaloExchange`
2061 :param options: a dictionary with options for transformations.
2062 :type options: Optional[Dict[str, Any]]
2064 :raises TransformationError: if the node argument is not a
2065 HaloExchange (or subclass thereof)
2069 isinstance(node, (LFRicHaloExchangeStart, LFRicHaloExchangeEnd)):
2071 f
"Error in Dynamo0p3AsyncHaloExchange transformation. Supplied"
2072 f
" node must be a synchronous halo exchange but found "
2077 '''Modifies a kernel so that the number of dofs, number of layers and
2078 number of quadrature points are fixed in the kernel rather than
2079 being passed in by argument.
2081 >>> from psyclone.parse.algorithm import parse
2082 >>> from psyclone.psyGen import PSyFactory
2083 >>> api = "dynamo0.3"
2084 >>> ast, invokeInfo = parse("file.f90", api=api)
2085 >>> psy=PSyFactory(api).create(invokeInfo)
2086 >>> schedule = psy.invokes.get('invoke_0').schedule
2087 >>> # Uncomment the following line to see a text view of the schedule
2088 >>> # print(schedule.view())
2090 >>> from psyclone.transformations import Dynamo0p3KernelConstTrans
2091 >>> trans = Dynamo0p3KernelConstTrans()
2092 >>> for kernel in schedule.coded_kernels():
2093 >>> trans.apply(kernel, number_of_layers=150)
2094 >>> kernel_schedule = kernel.get_kernel_schedule()
2095 >>> # Uncomment the following line to see a text view of the
2097 >>> # print(kernel_schedule.symbol_table.view())
2112 space_to_dofs = {
"w3": (
lambda n: (n+1)**3),
2113 "w2": (
lambda n: 3*(n+2)*(n+1)**2),
2114 "w1": (
lambda n: 3*(n+2)**2*(n+1)),
2115 "w0": (
lambda n: (n+2)**3),
2116 "wtheta": (
lambda n: (n+2)*(n+1)**2),
2117 "w2h": (
lambda n: 2*(n+2)*(n+1)**2),
2118 "w2v": (
lambda n: (n+2)*(n+1)**2),
2119 "w2broken": (
lambda n: 3*(n+1)**2*(n+2)),
2120 "wchi": (
lambda n: (n+1)**3),
2121 "w2trace": (
lambda n: 6*(n+1)**2),
2122 "w2htrace": (
lambda n: 4*(n+1)**2),
2123 "w2vtrace": (
lambda n: 2*(n+1)**2)}
2126 return (
"Makes the number of degrees of freedom, the number of "
2127 "quadrature points and the number of layers constant in "
2133 :returns: the name of this transformation as a string.
2136 return "Dynamo0p3KernelConstTrans"
2140 '''Transforms a kernel so that the values for the number of degrees of
2141 freedom (if a valid value for the element_order arg is
2142 provided), the number of quadrature points (if the quadrature
2143 arg is set to True) and the number of layers (if a valid value
2144 for the number_of_layers arg is provided) are constant in a
2145 kernel rather than being passed in by argument.
2147 The "cellshape", "element_order" and "number_of_layers"
2148 arguments are provided to mirror the namelist values that are
2149 input into an LFRic model when it is run.
2151 Quadrature support is currently limited to XYoZ in ths
2152 transformation. In the case of XYoZ the number of quadrature
2153 points (for horizontal and vertical) are set to the
2154 element_order + 3 in the LFRic infrastructure so their value
2157 :param node: a kernel node.
2158 :type node: :py:obj:`psyclone.domain.lfric.LFRicKern`
2159 :param options: a dictionary with options for transformations.
2160 :type options: Optional[Dict[str, Any]]
2161 :param str options["cellshape"]: the shape of the cells. This is\
2162 provided as it helps determine the number of dofs a field has\
2163 for a particular function space. Currently only "quadrilateral"\
2164 is supported which is also the default value.
2165 :param int options["element_order"]: the order of the cell. In \
2166 combination with cellshape, this determines the number of \
2167 dofs a field has for a particular function space. If it is set \
2168 to None (the default) then the dofs values are not set as \
2169 constants in the kernel, otherwise they are.
2170 :param int options["number_of_layers"]: the number of vertical \
2171 layers in the LFRic model mesh used for this particular run. If \
2172 this is set to None (the default) then the nlayers value is not \
2173 set as a constant in the kernel, otherwise it is.
2174 :param bool options["quadrature"]: whether the number of quadrature \
2175 points values are set as constants in the kernel (True) or not \
2176 (False). The default is False.
2180 def make_constant(symbol_table, arg_position, value,
2181 function_space=None):
2182 '''Utility function that modifies the argument at position
2183 'arg_position' into a compile-time constant with value
2186 :param symbol_table: the symbol table for the kernel holding
2187 the argument that is going to be modified.
2188 :type symbol_table: :py:class:`psyclone.psyir.symbols.SymbolTable`
2189 :param int arg_position: the argument's position in the
2191 :param value: the constant value that this argument is going to
2192 be given. Its type depends on the type of the argument.
2193 :type value: int, str or bool
2194 :type str function_space: the name of the function space if there
2195 is a function space associated with this argument. Defaults
2199 arg_index = arg_position - 1
2201 symbol = symbol_table.argument_list[arg_index]
2202 except IndexError
as err:
2204 f
"The argument index '{arg_index}' is greater than the "
2205 f
"number of arguments "
2206 f
"'{len(symbol_table.argument_list)}'.")
from err
2209 if not isinstance(symbol.datatype, ScalarType):
2211 f
"Expected entry to be a scalar argument but found "
2212 f
"'{type(symbol.datatype).__name__}'.")
2213 if symbol.datatype.intrinsic != ScalarType.Intrinsic.INTEGER:
2215 f
"Expected entry to be a scalar integer argument "
2216 f
"but found '{symbol.datatype}'.")
2217 if symbol.is_constant:
2219 "Expected entry to be a scalar integer argument "
2220 "but found a constant.")
2225 orig_name = symbol.name
2226 new_name = symbol_table.next_available_name(f
"{orig_name}_dummy")
2227 local_symbol =
DataSymbol(new_name, INTEGER_TYPE,
2228 is_constant=
True, initial_value=value)
2229 symbol_table.add(local_symbol)
2230 symbol_table.swap_symbol_properties(symbol, local_symbol)
2233 print(f
" Modified {orig_name}, arg position {arg_position},"
2234 f
" function space {function_space}, value {value}.")
2236 print(f
" Modified {orig_name}, arg position {arg_position},"
2244 number_of_layers = options.get(
"number_of_layers",
None)
2245 quadrature = options.get(
"quadrature",
False)
2246 element_order = options.get(
"element_order",
None)
2250 arg_list_info.generate()
2252 kernel_schedule = kernel.get_kernel_schedule()
2253 except NotImplementedError
as excinfo:
2255 f
"Failed to parse kernel '{kernel.name}'. Error reported was "
2256 f
"'{excinfo}'.")
from excinfo
2258 symbol_table = kernel_schedule.symbol_table
2259 if number_of_layers:
2260 make_constant(symbol_table, arg_list_info.nlayers_positions[0],
2263 if quadrature
and arg_list_info.nqp_positions:
2266 if kernel.eval_shapes == [
"gh_quadrature_xyoz"]:
2267 make_constant(symbol_table,
2268 arg_list_info.nqp_positions[0][
"horizontal"],
2270 make_constant(symbol_table,
2271 arg_list_info.nqp_positions[0][
"vertical"],
2275 f
"Error in Dynamo0p3KernelConstTrans transformation. "
2276 f
"Support is currently limited to 'xyoz' quadrature but "
2277 f
"found {kernel.eval_shapes}.")
2280 if element_order
is not None:
2282 for info
in arg_list_info.ndf_positions:
2283 if (info.function_space.lower()
in
2284 (const.VALID_ANY_SPACE_NAMES +
2285 const.VALID_ANY_DISCONTINUOUS_SPACE_NAMES +
2288 print(f
" Skipped dofs, arg position {info.position}, "
2289 f
"function space {info.function_space}")
2292 ndofs = Dynamo0p3KernelConstTrans. \
2294 info.function_space](element_order)
2295 except KeyError
as err:
2297 f
"Error in Dynamo0p3KernelConstTrans "
2298 f
"transformation. Unsupported function space "
2299 f
"'{info.function_space}' found. Expecting one of "
2300 f
"""{Dynamo0p3KernelConstTrans.
2301 space_to_dofs.keys()}.""")
from err
2302 make_constant(symbol_table, info.position, ndofs,
2303 function_space=info.function_space)
2306 kernel.modified =
True
2309 '''This method checks whether the input arguments are valid for
2310 this transformation.
2312 :param node: a dynamo 0.3 kernel node.
2313 :type node: :py:obj:`psyclone.domain.lfric.LFRicKern`
2314 :param options: a dictionary with options for transformations.
2315 :type options: Optional[Dict[str, Any]]
2316 :param str options["cellshape"]: the shape of the elements/cells.
2317 :param int options["element_order"]: the order of the elements/cells.
2318 :param int options["number_of_layers"]: the number of layers to use.
2319 :param bool options["quadrature"]: whether quadrature dimension sizes \
2320 should or shouldn't be set as constants in a kernel.
2322 :raises TransformationError: if the node argument is not a \
2323 dynamo 0.3 kernel, the cellshape argument is not set to \
2324 "quadrilateral", the element_order argument is not a 0 or a \
2325 positive integer, the number of layers argument is not a \
2326 positive integer, the quadrature argument is not a boolean, \
2327 neither element order nor number of layers arguments are set \
2328 (as the transformation would then do nothing), or the \
2329 quadrature argument is True but the element order is not \
2330 provided (as the former needs the latter).
2333 if not isinstance(node, LFRicKern):
2335 f
"Error in Dynamo0p3KernelConstTrans transformation. Supplied "
2336 f
"node must be a dynamo kernel but found '{type(node)}'.")
2340 cellshape = options.get(
"cellshape",
"quadrilateral")
2341 element_order = options.get(
"element_order",
None)
2342 number_of_layers = options.get(
"number_of_layers",
None)
2343 quadrature = options.get(
"quadrature",
False)
2344 if cellshape.lower() !=
"quadrilateral":
2347 f
"Error in Dynamo0p3KernelConstTrans transformation. Supplied "
2348 f
"cellshape must be set to 'quadrilateral' but found "
2351 if element_order
is not None and \
2352 (
not isinstance(element_order, int)
or element_order < 0):
2355 f
"Error in Dynamo0p3KernelConstTrans transformation. The "
2356 f
"element_order argument must be >= 0 but found "
2357 f
"'{element_order}'.")
2359 if number_of_layers
is not None and \
2360 (
not isinstance(number_of_layers, int)
or number_of_layers < 1):
2363 f
"Error in Dynamo0p3KernelConstTrans transformation. The "
2364 f
"number_of_layers argument must be > 0 but found "
2365 f
"'{number_of_layers}'.")
2367 if quadrature
not in [
False,
True]:
2370 f
"Error in Dynamo0p3KernelConstTrans transformation. The "
2371 f
"quadrature argument must be boolean but found "
2374 if element_order
is None and not number_of_layers:
2377 "Error in Dynamo0p3KernelConstTrans transformation. At least "
2378 "one of element_order or number_of_layers must be set "
2379 "otherwise this transformation does nothing.")
2381 if quadrature
and element_order
is None:
2384 "Error in Dynamo0p3KernelConstTrans transformation. If "
2385 "quadrature is set then element_order must also be set (as "
2386 "the values of the former are derived from the latter.")
2391 Adds an OpenACC "enter data" directive to a Schedule.
2394 >>> from psyclone.parse.algorithm import parse
2395 >>> from psyclone.psyGen import PSyFactory
2396 >>> api = "gocean1.0"
2397 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
2398 >>> psy = PSyFactory(api).create(invokeInfo)
2400 >>> from psyclone.transformations import \
2401 ACCEnterDataTrans, ACCLoopTrans, ACCParallelTrans
2402 >>> dtrans = ACCEnterDataTrans()
2403 >>> ltrans = ACCLoopTrans()
2404 >>> ptrans = ACCParallelTrans()
2406 >>> schedule = psy.invokes.get('invoke_0').schedule
2407 >>> # Uncomment the following line to see a text view of the schedule
2408 >>> # print(schedule.view())
2410 >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule
2411 >>> for child in schedule.children[:]:
2412 ... ltrans.apply(child)
2414 >>> # Enclose all of these loops within a single OpenACC parallel region
2415 >>> ptrans.apply(schedule)
2417 >>> # Add an enter data directive
2418 >>> dtrans.apply(schedule)
2420 >>> # Uncomment the following line to see a text view of the schedule
2421 >>> # print(schedule.view())
2425 return "Adds an OpenACC 'enter data' directive"
2430 :returns: the name of this transformation.
2433 return "ACCEnterDataTrans"
2437 '''Adds an OpenACC "enter data" directive to the invoke associated
2438 with the supplied Schedule. Any fields accessed by OpenACC kernels
2439 within this schedule will be added to this data region in
2440 order to ensure they remain on the target device.
2442 :param sched: schedule to which to add an "enter data" directive.
2443 :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule`
2444 :param options: a dictionary with options for transformations.
2445 :type options: Optional[Dict[str, Any]]
2452 if isinstance(sched, DynInvokeSchedule):
2455 elif isinstance(sched, GOInvokeSchedule):
2458 elif isinstance(sched, NemoInvokeSchedule):
2464 f
"ACCEnterDataTrans.validate() has not rejected an "
2465 f
"(unsupported) schedule of type {type(sched)}")
2470 directive_cls = (ACCParallelDirective, ACCKernelsDirective)
2471 directive = sched.walk(directive_cls, stop_type=directive_cls)
2473 current = directive[0]
2474 while current
not in sched.children:
2475 current = current.parent
2476 posn = sched.children.index(current)
2480 data_dir = AccEnterDataDir(parent=sched, children=[])
2481 sched.addchild(data_dir, index=posn)
2486 Check that we can safely apply the OpenACC enter-data transformation
2487 to the supplied Schedule.
2489 :param sched: Schedule to which to add an "enter data" directive.
2490 :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule`
2491 :param options: a dictionary with options for transformations.
2492 :type options: Optional[Dict[str, Any]]
2494 :raises TransformationError: if passed something that is not a \
2495 (subclass of) :py:class:`psyclone.psyir.nodes.Schedule`.
2500 if not isinstance(sched, Schedule):
2502 "directive to something that is not a "
2506 directive_cls = (ACCDataDirective, ACCEnterDataDirective)
2507 if sched.walk(directive_cls, stop_type=directive_cls):
2509 "region - cannot add an enter data.")
2514 Transform a kernel or routine by adding a "!$acc routine" directive
2515 (causing it to be compiled for the OpenACC accelerator device).
2518 >>> from psyclone.parse.algorithm import parse
2519 >>> from psyclone.psyGen import PSyFactory
2520 >>> api = "gocean1.0"
2521 >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
2522 >>> psy = PSyFactory(api).create(invokeInfo)
2524 >>> from psyclone.transformations import ACCRoutineTrans
2525 >>> rtrans = ACCRoutineTrans()
2527 >>> schedule = psy.invokes.get('invoke_0').schedule
2528 >>> # Uncomment the following line to see a text view of the schedule
2529 >>> # print(schedule.view())
2530 >>> kern = schedule.children[0].children[0].children[0]
2531 >>> # Transform the kernel
2532 >>> rtrans.apply(kern)
2538 :returns: the name of this transformation class.
2541 return "ACCRoutineTrans"
2545 Add the '!$acc routine' OpenACC directive into the code of the
2546 supplied Kernel (in a PSyKAl API such as GOcean or LFRic) or directly
2547 in the supplied Routine.
2549 :param node: the kernel call or routine implementation to transform.
2550 :type node: :py:class:`psyclone.psyGen.Kern` or \
2551 :py:class:`psyclone.psyir.nodes.Routine`
2552 :param options: a dictionary with options for transformations.
2553 :type options: Optional[Dict[str, Any]]
2559 if isinstance(node, Kern):
2561 node.modified =
True
2564 routine = node.get_kernel_schedule()
2569 for child
in routine.children:
2570 if isinstance(child, ACCRoutineDirective):
2576 Perform checks that the supplied kernel or routine can be transformed.
2578 :param node: the kernel or routine which is the target of this
2580 :type node: :py:class:`psyclone.psyGen.Kern` |
2581 :py:class:`psyclone.psyir.nodes.Routine`
2582 :param options: a dictionary with options for transformations.
2583 :type options: Optional[Dict[str, Any]]
2584 :param bool options["force"]: whether to allow routines with
2585 CodeBlocks to run on the GPU.
2587 :raises TransformationError: if the node is not a kernel or a routine.
2588 :raises TransformationError: if the target is a built-in kernel.
2589 :raises TransformationError: if it is a kernel but without an
2591 :raises TransformationError: if any of the symbols in the kernel are
2592 accessed via a module use statement.
2593 :raises TransformationError: if the kernel contains any calls to other
2603 Enclose a sub-set of nodes from a Schedule within an OpenACC kernels
2604 region (i.e. within "!$acc kernels" ... "!$acc end kernels" directives).
2608 >>> from psyclone.parse.algorithm import parse
2609 >>> from psyclone.psyGen import PSyFactory
2611 >>> ast, invokeInfo = parse(NEMO_SOURCE_FILE, api=api)
2612 >>> psy = PSyFactory(api).create(invokeInfo)
2614 >>> from psyclone.transformations import ACCKernelsTrans
2615 >>> ktrans = ACCKernelsTrans()
2617 >>> schedule = psy.invokes.get('tra_adv').schedule
2618 >>> # Uncomment the following line to see a text view of the schedule
2619 >>> # print(schedule.view())
2620 >>> kernels = schedule.children[9]
2621 >>> # Transform the kernel
2622 >>> ktrans.apply(kernels)
2625 excluded_node_types = (CodeBlock, Return, PSyDataNode,
2631 :returns: the name of this transformation class.
2634 return "ACCKernelsTrans"
2638 Enclose the supplied list of PSyIR nodes within an OpenACC
2641 :param node: a node or list of nodes in the PSyIR to enclose.
2642 :type node: (a list of) :py:class:`psyclone.psyir.nodes.Node`
2643 :param options: a dictionary with options for transformations.
2644 :type options: Optional[Dict[str, Any]]
2645 :param bool options["default_present"]: whether or not the kernels \
2646 region should have the 'default present' attribute (indicating \
2647 that data is already on the accelerator). When using managed \
2648 memory this option should be False.
2657 parent = node_list[0].parent
2658 start_index = node_list[0].position
2662 default_present = options.get(
"default_present",
False)
2666 parent=parent, children=[node.detach()
for node
in node_list],
2667 default_present=default_present)
2669 parent.children.insert(start_index, directive)
2674 Check that we can safely enclose the supplied node or list of nodes
2675 within OpenACC kernels ... end kernels directives.
2677 :param nodes: the proposed PSyIR node or nodes to enclose in the \
2679 :type nodes: (list of) :py:class:`psyclone.psyir.nodes.Node`
2680 :param options: a dictionary with options for transformations.
2681 :type options: Optional[Dict[str, Any]]
2683 :raises NotImplementedError: if the supplied Nodes belong to \
2685 :raises TransformationError: if there are no Loops within the \
2694 sched = node_list[0].ancestor((NemoInvokeSchedule, DynInvokeSchedule))
2696 raise NotImplementedError(
2697 "OpenACC kernels regions are currently only supported for the "
2698 "nemo and dynamo0.3 front-ends")
2699 super().
validate(node_list, options)
2703 for node
in node_list:
2704 if (any(assign
for assign
in node.walk(Assignment)
2705 if assign.is_array_assignment)
or node.walk(Loop)):
2710 "A kernels transformation must enclose at least one loop or "
2711 "array range but none were found.")
2716 Add an OpenACC data region around a list of nodes in the PSyIR.
2717 COPYIN, COPYOUT and COPY clauses are added as required.
2721 >>> from psyclone.parse.algorithm import parse
2722 >>> from psyclone.psyGen import PSyFactory
2724 >>> ast, invokeInfo = parse(NEMO_SOURCE_FILE, api=api)
2725 >>> psy = PSyFactory(api).create(invokeInfo)
2727 >>> from psyclone.transformations import ACCKernelsTrans, ACCDataTrans
2728 >>> ktrans = ACCKernelsTrans()
2729 >>> dtrans = ACCDataTrans()
2731 >>> schedule = psy.invokes.get('tra_adv').schedule
2732 >>> # Uncomment the following line to see a text view of the schedule
2733 >>> # print(schedule.view())
2735 >>> # Add a kernels construct for execution on the device
2736 >>> kernels = schedule.children[9]
2737 >>> ktrans.apply(kernels)
2739 >>> # Enclose the kernels in a data construct
2740 >>> kernels = schedule.children[9]
2741 >>> dtrans.apply(kernels)
2744 excluded_node_types = (CodeBlock, Return, PSyDataNode)
2749 :returns: the name of this transformation.
2753 return "ACCDataTrans"
2757 Put the supplied node or list of nodes within an OpenACC data region.
2759 :param node: the PSyIR node(s) to enclose in the data region.
2760 :type node: (list of) :py:class:`psyclone.psyir.nodes.Node`
2761 :param options: a dictionary with options for transformations.
2762 :type options: Optional[Dict[str, Any]]
2771 parent = node_list[0].parent
2772 start_index = node_list[0].position
2776 parent=parent, children=[node.detach()
for node
in node_list])
2778 parent.children.insert(start_index, directive)
2783 Check that we can safely add a data region around the supplied list
2786 :param nodes: the proposed node(s) to enclose in a data region.
2787 :type nodes: List[:py:class:`psyclone.psyir.nodes.Node`] |
2788 :py:class:`psyclone.psyir.nodes.Node`
2789 :param options: a dictionary with options for transformations.
2790 :type options: Optional[Dict[str, Any]]
2792 :raises TransformationError: if the Schedule to which the nodes
2793 belong already has an 'enter data' directive.
2794 :raises TransformationError: if any of the nodes are themselves
2796 :raises TransformationError: if an array of structures needs to be
2797 deep copied (this is not currently supported).
2804 super().
validate(node_list, options)
2808 schedule = node_list[0].root
2809 acc_dirs = schedule.walk(ACCEnterDataDirective)
2812 "Cannot add an OpenACC data region to a schedule that "
2813 "already contains an 'enter data' directive.")
2816 for node
in node_list:
2817 for sref
in node.walk(StructureReference):
2822 cursor = sref.ancestor(Loop, limit=node)
2824 loop_vars.append(
Signature(cursor.variable.name))
2825 cursor = cursor.ancestor(Loop)
2831 array_accesses = sref.walk(ArrayMixin)
2832 for access
in array_accesses:
2833 if not isinstance(access, StructureMember):
2836 for var
in loop_vars:
2837 if var
not in var_accesses.all_signatures:
2846 f
"Data region contains a structure access "
2847 f
"'{sref.debug_string()}' where component "
2848 f
"'{access.name}' is an array and is iterated over"
2849 f
" (variable '{var}'). Deep copying of data for "
2850 f
"structures is only supported where the deepest "
2851 f
"component is the one being iterated over.")
2856 Transformation that removes any accesses of imported data from the supplied
2857 kernel and places them in the caller. The values/references are then passed
2858 by argument into the kernel.
2863 :returns: the name of this transformation.
2866 return "KernelImportsToArguments"
2869 return (
"Convert the imported variables used inside the kernel "
2870 "into arguments and modify the InvokeSchedule to pass them"
2871 " in the kernel call.")
2875 Check that the supplied node is a valid target for this transformation.
2877 :param node: the PSyIR node to validate.
2878 :type node: :py:class:`psyclone.psyGen.CodedKern`
2879 :param options: a dictionary with options for transformations.
2880 :type options: Optional[Dict[str, Any]]
2882 :raises TransformationError: if the supplied node is not a CodedKern.
2883 :raises TransformationError: if this transformation is not applied to \
2884 a Gocean API Invoke.
2885 :raises TransformationError: if the supplied kernel contains wildcard \
2886 imports of symbols from one or more containers (e.g. a USE without\
2887 an ONLY clause in Fortran).
2889 if not isinstance(node, CodedKern):
2891 f
"The {self.name} transformation can only be applied to "
2892 f
"CodedKern nodes but found '{type(node).__name__}' instead.")
2894 invoke_schedule = node.ancestor(InvokeSchedule)
2895 if not isinstance(invoke_schedule, GOInvokeSchedule):
2897 f
"The {self.name} transformation is currently only supported "
2898 f
"for the GOcean API but got an InvokeSchedule of type: "
2899 f
"'{type(invoke_schedule).__name__}'")
2903 kernel = node.get_kernel_schedule()
2904 except SymbolError
as err:
2906 f
"Kernel '{node.name}' contains undeclared symbol: "
2907 f
"{err.value}")
from err
2909 symtab = kernel.symbol_table
2910 for container
in symtab.containersymbols:
2911 if container.wildcard_import:
2913 f
"Kernel '{node.name}' has a wildcard import of symbols "
2914 f
"from container '{container.name}'. This is not "
2922 Convert the imported variables used inside the kernel into arguments
2923 and modify the InvokeSchedule to pass the same imported variables to
2926 :param node: a kernel call.
2927 :type node: :py:class:`psyclone.psyGen.CodedKern`
2928 :param options: a dictionary with options for transformations.
2929 :type options: Optional[Dict[str, Any]]
2935 kernel = node.get_kernel_schedule()
2936 symtab = kernel.symbol_table
2937 invoke_symtab = node.ancestor(InvokeSchedule).symbol_table
2938 count_imported_vars_removed = 0
2943 for imported_var
in kernel.symbol_table.imported_symbols[:]:
2944 count_imported_vars_removed += 1
2948 if (type(imported_var)
is Symbol
or
2949 isinstance(imported_var.datatype, UnresolvedType)):
2950 updated_sym = imported_var.resolve_type()
2952 if updated_sym
is not imported_var:
2953 kernel.symbol_table.swap(imported_var, updated_sym)
2957 invoke_symtab.copy_external_import(
2958 updated_sym, tag=
"AlgArgs_" + updated_sym.name)
2962 container = updated_sym.interface.container_symbol
2965 current_arg_list = symtab.argument_list
2967 was_constant = updated_sym.is_constant
2968 updated_sym.is_constant =
False
2969 updated_sym.initial_value =
None
2975 ArgumentInterface.Access.READ)
2978 ArgumentInterface.Access.READWRITE)
2979 current_arg_list.append(updated_sym)
2980 symtab.specify_argument_list(current_arg_list)
2986 if updated_sym.datatype.intrinsic == ScalarType.Intrinsic.REAL:
2987 go_space =
"go_r_scalar"
2988 elif (updated_sym.datatype.intrinsic ==
2989 ScalarType.Intrinsic.INTEGER):
2990 go_space =
"go_i_scalar"
2993 f
"The imported variable '{updated_sym.name}' could not be "
2994 f
"promoted to an argument because the GOcean "
2995 f
"infrastructure does not have any scalar type equivalent "
2996 f
"to the PSyIR {updated_sym.datatype} type.")
2999 node.arguments.append(updated_sym.name, go_space)
3003 if not kernel.symbol_table.symbols_imported_from(container)
and \
3004 not container.wildcard_import:
3005 kernel.symbol_table.remove(container)
3007 if count_imported_vars_removed > 0:
3008 node.modified =
True
3013 "ACCEnterDataTrans",
3020 "Dynamo0p3AsyncHaloExchangeTrans",
3021 "Dynamo0p3ColourTrans",
3022 "Dynamo0p3KernelConstTrans",
3023 "Dynamo0p3OMPLoopTrans",
3024 "Dynamo0p3RedundantComputationTrans",
3025 "DynamoOMPParallelLoopTrans",
3026 "GOceanOMPLoopTrans",
3027 "GOceanOMPParallelLoopTrans",
3028 "KernelImportsToArguments",
3032 "OMPParallelLoopTrans",
3035 "ParallelRegionTrans",